-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathLink Spam.py
More file actions
75 lines (61 loc) · 2.66 KB
/
Link Spam.py
File metadata and controls
75 lines (61 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Two Gold Stars
# Question 2: Combatting Link Spam
# One of the problems with our page ranking system is pages can
# collude with each other to improve their page ranks. We consider
# A->B a reciprocal link if there is a link path from B to A of length
# equal to or below the collusion level, k. The length of a link path
# is the number of links which are taken to travel from one page to the
# other.
# If k = 0, then a link from A to A is a reciprocal link for node A,
# since no links needs to be taken to get from A to A.
# If k=1, B->A would count as a reciprocal link if there is a link
# A->B, which includes one link and so is of length 1. (it requires
# two parties, A and B, to collude to increase each others page rank).
# If k=2, B->A would count as a reciprocal link for node A if there is
# a path A->C->B, for some page C, (link path of length 2),
# or a direct link A-> B (link path of length 1).
# Modify the compute_ranks code to
# - take an extra input k, which is a non-negative integer, and
# - exclude reciprocal links of length up to and including k from
# helping the page rank.
def reciprocal(graph, page, node, k):
if k == 0:
if node == page:
return True
else:
return False
for link in graph[page]:
if reciprocal(graph, link, node, k-1):
return True
return False
def compute_ranks(graph, k):
d = 0.8 # damping factor
numloops = 10
ranks = {}
npages = len(graph)
for page in graph: # initialize all the ranks to (1/number of pages)
ranks[page] = 1.0 / npages
for i in range(0, numloops):
newranks = {}
for page in graph:
newrank = (1 - d) / npages
for node in graph:
if page in graph[node] and not reciprocal(graph, page, node, k):
newrank = newrank + d * (ranks[node]/len(graph[node]))
newranks[page] = newrank
ranks = newranks
return ranks
# For example
g = {'a': ['a', 'b', 'c'], 'b':['a'], 'c':['d'], 'd':['a']}
print ( compute_ranks(g, 0) ) # the a->a link is reciprocal
#>>> {'a': 0.26676872354238684, 'c': 0.1216391112164609,
# 'b': 0.1216391112164609, 'd': 0.1476647842238683}
print ( compute_ranks(g, 1) ) # a->a, a->b, b->a links are reciprocal
#>>> {'a': 0.14761759762962962, 'c': 0.08936469270123457,
# 'b': 0.04999999999999999, 'd': 0.12202199703703702}
print ( compute_ranks(g, 2) )
# a->a, a->b, b->a, a->c, c->d, d->a links are reciprocal
# (so all pages end up with the same rank)
#>>> {'a': 0.04999999999999999, 'c': 0.04999999999999999,
# 'b': 0.04999999999999999, 'd': 0.04999999999999999}
print(compute_ranks(g, 3))