-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathCodeToAST.py
More file actions
43 lines (35 loc) · 1.21 KB
/
Copy pathCodeToAST.py
File metadata and controls
43 lines (35 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
import os
import sys
#
# 각 개별 코드를 AST로 변경
# bcb_funcs_all.tsv : 코드 조각이 기술된 파일
#
class CodeToAST:
def __init__(self, root):
self.root = root
self.sources = None
self.size = None
self.pairs = None
def parse_source(self, output_file, option):
path = self.root + output_file
if os.path.exists(path) and option == 'existing':
source = pd.read_pickle(path)
else:
import javalang
def parse_program(func):
tokens = javalang.tokenizer.tokenize(func)
parser = javalang.parser.Parser(tokens)
tree = parser.parse_member_declaration()
return tree
source = pd.read_csv(self.root + 'bcb_funcs_all.tsv', sep='\t', header=None, encoding='utf-8')
source.columns = ['id', 'code']
source['code'] = source['code'].apply(parse_program)
source.to_pickle(path)
self.sources = source
return source
def run(self):
print('parse source code...')
self.parse_source(output_file='ast.pkl', option='existing')
code2ast = CodeToAST('data/')
code2ast.run()