Skip to content

Commit

Permalink
Fix Python 3.8 compatibility issue, remove import expr, bump version …
Browse files Browse the repository at this point in the history
…to 1.3, fixes #4
  • Loading branch information
刘宝 committed Dec 8, 2019
1 parent 3d17a6a commit de3b6c3
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 35 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
HISTORY
--------

## 1.3 (2019-12-8)
- Fix python 3.8 compatibility issue [#4](https://github.com/fyrestone/pycode_similar/issues/4) (Thanks to aginiewicz)
- Remove import expr

## 1.2 (2018-1-18)

- Fix python3 compatibility issue.
Expand Down
54 changes: 41 additions & 13 deletions pycode_similar/pycode_similar.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
# -*- coding: utf-8 -*-
__author__ = '[email protected]'
__version__ = '1.2'
__version__ = '1.3'

import sys
import ast
import difflib
import operator
import argparse
import itertools
import collections
from collections import Counter

# avoid using six to keep dependency clean
if sys.version_info >= (3, 3):
import collections.abc as collections
else:
import collections

if sys.version_info[0] == 3:
string_types = str
else:
string_types = basestring


class FuncNodeCollector(ast.NodeTransformer):
Expand Down Expand Up @@ -75,6 +86,15 @@ def generic_visit(self, node):
self._mark_docstring_sub_nodes(node)
return super(FuncNodeCollector, self).generic_visit(node)

def visit_Constant(self, node):
# introduce a special value for erasing constant node value,
# del node.value will make node.s and node.n raise Exception.
# for Python 3.8
dummy_value = '__pycode_similar_dummy_value__'
if type(node) == str:
node.value = dummy_value
self.generic_visit(node)

def visit_Str(self, node):
del node.s
self.generic_visit(node)
Expand Down Expand Up @@ -159,6 +179,14 @@ def visit_Print(self, node):
# remove print expr for python2
pass

def visit_Import(self, node):
# remote import ...
pass

def visit_ImportFrom(self, node):
# remote from ... import ...
pass

def clear(self):
self._func_nodes = []

Expand Down Expand Up @@ -229,7 +257,7 @@ def func_ast_lines(self):
def _retrieve_func_code_lines(func_node, code_lines):
if not isinstance(func_node, ast.FunctionDef):
return []
if not isinstance(code_lines, collections.Sequence) or isinstance(code_lines, basestring):
if not isinstance(code_lines, collections.Sequence) or isinstance(code_lines, string_types):
return []
if getattr(func_node, 'endlineno', -1) < getattr(func_node, 'lineno', 0):
return []
Expand Down Expand Up @@ -335,11 +363,11 @@ def __str__(self):
if isinstance(self.info_ref, FuncInfo) and isinstance(self.info_candidate, FuncInfo):
return '{:<4.2}: ref {}, candidate {}'.format(self.plagiarism_percent,
self.info_ref.func_name + '<' + str(
self.info_ref.func_node.lineno) + ':' + str(
self.info_ref.func_node.col_offset) + '>',
self.info_ref.func_node.lineno) + ':' + str(
self.info_ref.func_node.col_offset) + '>',
self.info_candidate.func_name + '<' + str(
self.info_candidate.func_node.lineno) + ':' + str(
self.info_candidate.func_node.col_offset) + '>')
self.info_candidate.func_node.lineno) + ':' + str(
self.info_candidate.func_node.col_offset) + '>')
return '{:<4.2}: ref {}, candidate {}'.format(0, None, None)


Expand Down Expand Up @@ -372,7 +400,7 @@ def _gen():
for line in b[j1:j2]:
yield '+'

return collections.Counter(_gen())['-']
return Counter(_gen())['-']

@staticmethod
def total(a, b):
Expand Down Expand Up @@ -527,12 +555,12 @@ def get_file(value):
sum_total_count = sum(func_diff_info.total_count for func_diff_info in func_ast_diff_list)
sum_plagiarism_count = sum(func_diff_info.plagiarism_count for func_diff_info in func_ast_diff_list)
print('{:.2f} % ({}/{}) of ref code structure is plagiarized by candidate.'.format(
sum_plagiarism_count / float(sum_total_count) * 100,
sum_plagiarism_count,
sum_total_count))
sum_plagiarism_count / float(sum_total_count) * 100,
sum_plagiarism_count,
sum_total_count))
print('candidate function plagiarism details (AST lines >= {} and plagiarism percentage >= {}):'.format(
args.l,
args.p,
args.l,
args.p,
))
output_count = 0
for func_diff_info in func_ast_diff_list:
Expand Down
60 changes: 38 additions & 22 deletions pycode_similar/tests/test_cases.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
import sys

sys.path.insert(0, os.path.realpath(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))))

import unittest
import pycode_similar


class TestCases(unittest.TestCase):

def test_basic_detect(self):
Expand All @@ -28,71 +30,71 @@ def bar(self):
self.assertGreater(result[0][1][0].plagiarism_percent, 0.5)

def test_name(self):
s1 = """
s1 = """
def foo(a):
if a > 1:
return True
return False
"""
s2 = """
s2 = """
def bar(b):
if b > 1:
return True
return False
"""
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)

def test_equal(self):
s1 = """
s1 = """
def foo(a):
if a == 1:
return True
return False
"""
s2 = """
s2 = """
def bar(b):
if 1 == b:
return True
return False
"""
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)

def test_gt_lt(self):
s1 = """
s1 = """
def foo(a):
if a > 1:
return True
return False
"""
s2 = """
s2 = """
def bar(b):
if 1 < b:
return True
return False
"""
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)

def test_gte_lte(self):
s1 = """
s1 = """
def foo(a):
if a >= 1:
return True
return False
"""
s2 = """
s2 = """
def bar(b):
if 1 <= b:
return True
return False
"""
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)

def test_space_and_comments(self):
s1 = """
s1 = """
def foo(a):
\"""
foo comments.
Expand All @@ -103,7 +105,7 @@ def foo(a):
# this should return False
return False
"""
s2 = """
s2 = """
def bar(b):
# bar comments.
if 1 <= b:
Expand All @@ -114,8 +116,8 @@ def bar(b):
return True
return False
"""
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)

def test_expr(self):
s1 = """
Expand Down Expand Up @@ -160,9 +162,23 @@ def foo(a):
result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)

def test_strip_import(self):
s1 = """
def foo():
import sys
from os import path
"""
s2 = """
def foo():
import os
import ast
from collections import Counter
"""

result = pycode_similar.detect([s1, s2])
self.assertEqual(result[0][1][0].plagiarism_percent, 1)


if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.test_reload_custom_code_after_changes_in_class']
# import sys;sys.argv = ['', 'Test.test_reload_custom_code_after_changes_in_class']
unittest.main()

0 comments on commit de3b6c3

Please sign in to comment.