-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmain.py
More file actions
150 lines (132 loc) · 4.93 KB
/
main.py
File metadata and controls
150 lines (132 loc) · 4.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
####################################
# Author: Jeremy (Meng-Chieh) Lee #
# Email : mengchil@cs.cmu.edu #
####################################
import os
import sys
import copy
import progressbar
import time
import numpy as np
import collections
from termcolor import colored
from config import parser
from gspan import *
from utils import *
from graph import AUTO_EDGE_ID
from graph import Graph, instance_filter_mis
from graph import VACANT_GRAPH_ID
from graph import VACANT_VERTEX_LABEL
def main(FLAGS=None):
"""Run GRANDDIWE"""
if FLAGS is None:
FLAGS, _ = parser.parse_known_args(args=sys.argv[1:])
graphs = read_graphs(FLAGS.input_file)
graph_ori_mdl, a_score, g_mdl_bits, m_mdl_bits, w_mdl_bits = [], [], [], [], []
for idx, _ in enumerate(graphs):
graph_ori_mdl.append(graphs[idx].calculate_mdl())
a_score.append([])
total_bits = [sum(graph_ori_mdl)]
iter, tttime = 0, 0
min_s = FLAGS.min_support
while True:
print('\n----------------------------------------------------------------------\n')
print(colored('Iteration ' + str(iter) + '\n', 'magenta'))
print(colored('Finding Frequent Substructures...', 'blue'))
start1 = time.time()
gs = gSpan(
graphs=copy.deepcopy(graphs),
iteration = iter,
min_support=min_s,
min_num_vertices=FLAGS.lower_bound_of_num_vertices,
max_num_vertices=FLAGS.upper_bound_of_num_vertices,
verbose=FLAGS.verbose,
visualize=FLAGS.plot,
where=FLAGS.where
)
freq_subs = gs.run()
freq_subs = eliminate_incorrect_pattern(copy.deepcopy(freq_subs), min_s)
freq_subs = find_directed_frequent_subgraphs(freq_subs, min_s)
freq_subs = instance_filter_mis(freq_subs, min_s)
freq_subs = self_loop_detect(freq_subs, graphs, min_s)
end1 = time.time()
print(colored('Finishedd in %f Seconds!\n' % (end1 - start1), 'green'))
print(colored('Optimizing Edge Weight...', 'blue'))
start2 = time.time()
for idx, fs in enumerate(progressbar.progressbar(freq_subs)):
fs.update_edge_weight(graphs)
fs.update_gdict()
end2 = time.time()
print(colored('Finished in %f Seconds!\n' % (end2 - start2), 'green'))
print(colored('Evaluting Substructures...', 'blue'))
start3 = time.time()
max_score = 0
max_score_idx = -1
max_arr, max_mdl = [], dict()
for idx, fs in enumerate(progressbar.progressbar(freq_subs)):
score, tmp_arr, tmp_mdl = 0, [], dict()
fs_gid = [gi.gid for gi in fs.instances]
for gid in fs_gid:
g = graphs[gid]
g_new_mdl = g.calculate_compressed_mdl(fs)
g_old_mdl = g.calculate_mdl()
# if MDL doesn't become smaller, then don't compress it (even if it contains pattern)
if g_old_mdl - g_new_mdl > 0:
score += g_old_mdl - g_new_mdl
tmp_arr.append(gid)
tmp_mdl[gid] = (g_old_mdl, g_new_mdl)
f_mdl = fs.definition.calculate_mdl()
if max(0, score - f_mdl) > max_score and len(tmp_arr) > 0:
max_score = score - f_mdl
max_score_idx = idx
max_arr = copy.deepcopy(tmp_arr)
max_mdl = copy.deepcopy(tmp_mdl)
end3 = time.time()
print(colored('Finished in %f Seconds!\n' % (end3 - start3), 'green'))
if max_score_idx == -1:
min_s = int(min_s * 0.9)
if min_s <= 30:
ttime = (end1 - start1) + (end2 - start2) + (end3 - start3)
tttime += ttime
print(colored('No More Pattern is Found!', 'green'))
print(colored(str(iter) + ' Iterations Finished in %f Seconds!\n' % (tttime), 'green'))
break
else:
print(colored('Decrease MinSup', 'red'))
continue
freq_subs[max_score_idx].display()
if FLAGS.plot:
draw_graph(g=freq_subs[max_score_idx].definition,
path=FLAGS.where,
label='Pattern ' + str(iter) + '\n# of Instances: ' + str(len(fs.instances)))
print(colored('Compressing Graphs...', 'blue'))
start4 = time.time()
ins_count = 0
total_bits.append(total_bits[-1])
for idx, _ in enumerate(progressbar.progressbar(graphs)):
if idx in max_arr:
g_old_mdl, g_new_mdl = max_mdl[idx]
a_score[idx].append((g_old_mdl - g_new_mdl) / graph_ori_mdl[idx])
ins_num, ins_count = graphs[idx].compress(freq_subs[max_score_idx], iter, ins_count)
total_bits[iter + 1] -= g_old_mdl - g_new_mdl
else:
a_score[idx].append(0)
end4 = time.time()
print(colored('Finished in %f Seconds!\n' % (end4 - start4), 'green'))
ttime = (end1 - start1) + (end2 - start2) + (end3 - start3) + (end4 - start4)
tttime += ttime
print(colored('Iteration ' + str(iter) + ' Finished in %f Seconds\n' % ttime, 'magenta'))
iter += 1
if iter == FLAGS.iterations:
print(colored(str(FLAGS.iterations) + ' Iterations Finished in %f Seconds!\n' % (tttime), 'green'))
break
with open(FLAGS.output_file, 'w') as f:
f.write('MinSup ' + str(min_s) + ', Bits ' + str(total_bits[-1]) + '\n')
for gid, i in enumerate(a_score):
tmp = 1
for idx, j in enumerate(i):
tmp -= (1 / iter) * (iter - idx) * j
f.write(str(round(tmp, 5)))
f.write('\n')
if __name__ == '__main__':
main()