-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathText similarity detection.txt
More file actions
47 lines (33 loc) · 1.31 KB
/
Text similarity detection.txt
File metadata and controls
47 lines (33 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
### this is not included with numpy files
import math
import re
from collections import Counter
##plagiarism / text similarity detection program
WORD = re.compile(r"\w+")
def get_cosine(vec1, vec2): ##function
intersection = set(vec1.keys()) & set(vec2.keys())
numerator = sum([vec1[x] * vec2[x] for x in intersection])
sum1 = sum([vec1[x] ** 2 for x in list(vec1.keys())])
sum2 = sum([vec2[x] ** 2 for x in list(vec2.keys())])
denominator = math.sqrt(sum1) * math.sqrt(sum2)
if not denominator:
return 0.0
else:
return float(numerator) / denominator
def text_to_vector(text):
words = WORD.findall(text)
return Counter(words)
print(" PLAGIARISM DETECTION I ")
text1 = input("Enter the first text: ")
text2 = input("Enter the second text to compare: ")
vector1 = text_to_vector(text1)
vector2 = text_to_vector(text2)
print(" PLAGIARISM DETECTION II ")
text3 = input("Enter the first text: ")
text4 = input("Enter the second text to compare: ")
vector3 = text_to_vector(text3)
vector4 = text_to_vector(text4)
cosine1 = get_cosine(vector1, vector2)
cosine2 = get_cosine(vector3, vector4)
print("The Cosine Similarity Ratio for 1st comparison is: ", cosine1)
print("The Cosine Similarity Ratio for 2nd comparison is: ", cosine2)