-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtree.py
More file actions
87 lines (66 loc) · 2.62 KB
/
tree.py
File metadata and controls
87 lines (66 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
"""
author:Amir Aizin
This script doing
"""
import shutil
import graphviz
import os
import re
pathsections = "DataSet/Sections"
tree_path = "Tree"
text = []
save_file_name = []
list_to_str = ""
def readData():
# split by regular expression
doc_splitter = re.compile(r"^(?:Section\ )?\d+[\.\d+]?", re.MULTILINE)
for file in os.listdir(pathsections):
full_path = os.path.join(pathsections, file)
if os.path.isfile(full_path):
with open(full_path, "r", encoding="utf-8") as f:
# sections = []
text = []
# listToStr = ""
text.append((f.read()))
listToStr = ' '.join([str(elem) for elem in text])
starts = [match.span()[0] for match in doc_splitter.finditer(listToStr)] + [len(listToStr)]
sections = [listToStr[starts[idx]:starts[idx + 1]] for idx in range(len(starts) - 1)]
for i, name in enumerate(sections):
split_file = file.split(sep='.')[0]
split_file = split_file + str(i + 1) + ".txt"
"""
f = open(split_file, 'w')
f.write(pathsections)
f.close()
os.chdir(pathsections)
shutil.move(file_path, dir_name + '/' + file)
"""
PathSections = os.path.join(pathsections, split_file)
f = open(PathSections, "w", encoding='utf-8')
f.write(name + "\n")
f.close()
def splited_files_into_list(save_file_name=None):
for file in os.listdir(pathsections):
full_path = os.path.join(pathsections, file)
if os.path.isfile(full_path):
with open(full_path, "r", encoding="utf-8") as f:
save_file_name.append((f.readline()))
f.seek(0)
save_file_name.sort()
def graph_viz():
# list_to_str = ' '.join([str(elem) for elem in save_file_name])
G = graphviz.Digraph(name="Article Summarizer", node_attr={'shape': 'tab', 'fixedsize' :'False'})
for file in os.listdir(pathsections):
full_path = os.path.join(pathsections, file)
with open(full_path, "r", encoding="utf-8") as f:
for i,name in enumerate(save_file_name):
if len(save_file_name) < i+2:
break
else:
G.node(save_file_name[i])
G.edge(save_file_name[i],save_file_name[i+1], constraint='true')
G.view(directory=tree_path)
if __name__ == "__main__":
readData()
splited_files_into_list(save_file_name)
graph_viz()