-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
331 lines (292 loc) · 11.1 KB
/
utils.py
File metadata and controls
331 lines (292 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import networkx as nx
from collections import defaultdict
import matplotlib.pyplot as plt
def is_valid_network(G, T):
"""
Checks whether T is a valid network of G.
Args:
G: networkx.Graph
T: networkx.Graph
Returns:
bool: whether T is a valid network
"""
return nx.is_tree(T) and nx.is_dominating_set(G, T.nodes)
def average_pairwise_distance(T):
"""
Computes the average pairwise distance between vertices in T.
This is what we want to minimize!
Note that this function is a little naive, i.e. there are much
faster ways to compute the average pairwise distance in a tree.
Feel free to write your own!
Args:
T: networkx.Graph, a tree
Returns:
double: the average pairwise distance
"""
if not nx.is_connected(T):
raise ValueError("Tree must be connected")
if len(T) == 1: return 0
path_lengths = nx.all_pairs_dijkstra_path_length(T)
total_pairwise_distance = sum([sum(length[1].values()) for length in path_lengths])
return total_pairwise_distance / (len(T) * (len(T) - 1))
def average_pairwise_distance_fast(T):
"""Calculates the average pairwise distance for a tree in linear time.
Since there is always unique path between nodes in a tree, each edge in the
tree is used in all of the paths from the connected component on one side
of the tree to the other. So each edge contributes to the total pairwise cost
in the following way: if the size of the connected components that are
created from removing an edge e are A and B, then the total pairwise distance
cost for an edge is 2 * A * B * w(e) = (# of paths that use that edge) * w(e).
We multiply by two to consider both directions that paths can take on an
undirected edge.
Since each edge connects a subtree to the rest of a tree, we can run DFS
to compute the sizes of all of the subtrees, and iterate through all the edges
and sum the pairwise distance costs for each edge and divide by the total
number of pairs.
This is very similar to Q7 on MT1.
h/t to Noah Kingdon for the algorithm.
"""
if not nx.is_connected(T):
raise ValueError("Tree must be connected")
if len(T) == 1: return 0
subtree_sizes = {}
marked = defaultdict(bool)
# store child parent relationships for each edge, because the components
# created when removing an edge are the child subtree and the rest of the vertices
root = list(T.nodes)[0];
child_parent_pairs = [(root, root)]
def calculate_subtree_sizes(u):
"""Iterates through the tree to compute all subtree sizes in linear time
Args:
u: the root of the subtree to start the DFS
"""
unmarked_neighbors = filter(lambda v: not marked[v], T.neighbors(u))
marked[u] = True
size = 0
for v in unmarked_neighbors:
child_parent_pairs.append((v, u))
calculate_subtree_sizes(v)
size += subtree_sizes[v]
subtree_sizes[u] = size + 1
return subtree_sizes[u]
calculate_subtree_sizes(root) # any vertex can be the root of a tree
cost = 0
for c, p in child_parent_pairs:
if c != p:
a, b = subtree_sizes[c], len(T.nodes) - subtree_sizes[c]
w = T[c][p]["weight"]
cost += 2 * a * b * w
return cost / (len(T) * (len(T) - 1))
def draw(G):
plt.subplot(121)
nx.draw(G, with_labels=True, font_weight='bold')
plt.subplot(122)
nx.draw_shell(G, with_labels=True, font_weight='bold')
plt.show()
def draw2(G, T):
plt.subplot(221)
nx.draw(G, with_labels=True, font_weight='bold')
plt.subplot(222)
nx.draw_shell(G, with_labels=True, font_weight='bold')
plt.subplot(223)
nx.draw(T, with_labels=True, font_weight='bold')
plt.subplot(224)
nx.draw_shell(T, with_labels=True, font_weight='bold')
plt.show()
def mwd(G, weight='weight'):
"""This is source code of network.(min_weighted_dominating_set)
"""
dom_set = set([])
cost_func = dict((node, nd.get(weight, 1)) for node, nd in G.nodes(data=True))
vertices = set(G)
sets = dict((node, set([node]) | set(G[node])) for node in G)
def _cost(subset):
"""Our cost effectiveness function for sets given its weight."""
cost = sum(cost_func[node] for node in subset)
return cost / float(len(subset - dom_set))
while vertices:
# find the most cost effective set, and the vertex that for that set
dom_node, min_set = min(sets.items(),
key=lambda x: (x[0], _cost(x[1])))
alpha = _cost(min_set)
# reduce the cost for the rest
for node in min_set - dom_set:
cost_func[node] = alpha
# add the node to the dominating set and reduce what we must cover
dom_set.add(dom_node)
del sets[dom_node]
vertices = vertices - min_set
return dom_set
def getComponents(G, needConnect):
"""
G = nx.Graph()
G.add_nodes_from([1,2,3,4,5,6])
G.add_edges_from([(1,2),(2,3),(3,4),(3,5)])
need = set()
tree = set()
needConnect = {1,2,3,4,5,6}
input: G, needConnect
return: [[6], [1, 2, 3, 4, 5]]
"""
start = sorted(list(needConnect), key=lambda x: len(G[x]), reverse=True)[0]
components = []
while needConnect:
needConnect.discard(start)
part = [start]
queue = [start]
while queue:
start = queue.pop()
for e in set(G[start]):
if e in needConnect:
needConnect.discard(e)
part.append(e)
queue.append(e)
components.append(part)
if len(needConnect) == 0:
break
start = sorted(list(needConnect), key=lambda x: len(G[x]), reverse=True)[0]
return components
def findAllPath(G, components):
pathDic = {}
for i in range(len(components)):
for j in range(i+1, len(components)):
A, B = components[i], components[j]
for x in range(len(A)):
for y in range(len(B)):
if A[x] != B[y]:
pathDic[(A[x], B[y])] = nx.shortest_path(G, A[x], B[y])
return pathDic
def sortPathHelper(x):
try:
path = pathDic[x]
return sum([G[path[i]][path[i+1]]['weight'] for i in range(0, len(path)-1)])
except:
return float('inf')
def common_member(a, b):
return len(set(a).intersection(set(b) )) > 0
def connectComponents(G, components):
components = sorted(components, key=lambda x: len(x), reverse=True)
pathDic = findAllPath(G, components)
while len(components) > 1:
path = sorted(pathDic, key=lambda x: sortPathHelper(x))[0]
first = second = None
start = path[0]
end = path[1]
for i in components:
if start in i:
first = i
elif end in i:
second = i
if first != None and second != None:
break
newComponent = set()
components.remove(first)
for i in first:
newComponent.add(i)
components.remove(second)
for i in second:
newComponent.add(i)
for i in pathDic[path]:
newComponent.add(i)
if len(components) == 0:
return newComponent
components.insert(0, list(newComponent))
unique = True
while unique:
unique = False
for i in range(len(components)):
for j in range(i+1, len(components)):
if components[i] != components[j] and common_member(components[i], components[j]):
unique = True
a = components[i]
b = components[j]
components.remove(a)
components.remove(b)
components.append(list(set(a) | set(b)))
break
pathDic = findAllPath(G, components)
return components[0]
def buildTree(G, nodes):
newG = nx.Graph()
newG.add_nodes_from(nodes)
for i in sorted(G.edges, key=lambda x: G[x[0]][x[1]]['weight']):
if i[0] in nodes and i[1] in nodes:
newG.add_edge(i[0], i[1], weight=G[i[0]][i[1]]['weight'])
newG = nx.minimum_spanning_tree(newG, weight='weight', algorithm='kruskal')
for i in sorted(list(newG.nodes), key=lambda x: len(G[x])):
if len(G[i]) == 1:
newG.remove_node(i)
if len(G[i]) > 1:
break
return newG
def oneNode(G, nodes):
onlyone = nx.Graph()
onlyone.add_node(nodes[0])
return onlyone
def addNodes(G, tree, rest):
cur = float('inf')
if is_valid_network(G, tree):
cur = average_pairwise_distance(tree)
node = sorted(list(tree.nodes), key=lambda x: len(G[x]), reverse=True)
for i in sorted(list(rest), key=lambda x: len(G[x]), reverse=True):
tree.add_node(i)
for n in G[i]:
if n in node:
tree.add_edge(i, n)
tree[i][n]['weight'] = G[i][n]['weight']
tree = nx.minimum_spanning_tree(tree, weight='weight')
if is_valid_network(G, tree):
newdis = average_pairwise_distance(tree)
if newdis > cur:
tree.remove_node(i)
else:
cur = min(cur, newdis)
else:
tree.remove_node(i)
return tree
def removeNodes(G, tree):
nodes = sorted(list(tree.nodes), key=lambda x: len(G[x]), reverse=True)
cur = float('inf')
if is_valid_network(G, tree):
cur = average_pairwise_distance(tree)
li = list(nodes)
for i in nodes:
li.remove(i)
temp = buildTree(G.copy(), li)
if len(temp.nodes) > 0 and is_valid_network(G, temp) and average_pairwise_distance(temp) < cur:
cur = average_pairwise_distance(temp)
else:
li.append(i)
return buildTree(G, li)
def build(G, nodes):
newG = nx.Graph()
nodes = sorted(list(nodes), key=lambda x: len(G[x]))
if nodes:
newG.add_node(nodes.pop(0))
if nodes:
newG.add_node(nodes.pop(0))
cur = float('inf')
if is_valid_network(G, newG):
cur = average_pairwise_distance(newG)
while nodes or is_valid_network(G, newG):
for i in sorted(G.edges, key=lambda x: G[x[0]][x[1]]['weight']):
if i[0] in nodes and i[1] in nodes:
newG.add_edge(i[0], i[1], weight=G[i[0]][i[1]]['weight'])
temp = nx.minimum_spanning_tree(newG, weight='weight', algorithm='kruskal')
if is_valid_network(G, temp):
thisValue = average_pairwise_distance(newG)
if thisValue < cur:
cur = thisValue
newG = temp
if nodes:
newG.add_node(nodes.pop(0))
for i in sorted(G.edges, key=lambda x: G[x[0]][x[1]]['weight']):
if i[0] in nodes and i[1] in nodes:
newG.add_edge(i[0], i[1], weight=G[i[0]][i[1]]['weight'])
newG = nx.minimum_spanning_tree(newG, weight='weight', algorithm='kruskal')
for i in sorted(list(newG.nodes), key=lambda x: len(G[x])):
if len(G[i]) == 1:
newG.remove_node(i)
if len(G[i]) > 1:
break
return newG