-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathfeature_extractor_cli.py
More file actions
69 lines (56 loc) · 2.98 KB
/
feature_extractor_cli.py
File metadata and controls
69 lines (56 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# feature_cli.py
import argparse
import json
from pathlib import Path
import sys
from feature_extractor_v4 import process_poc, process_poc_folder
import dynamic_feature_extractor_v2 as dynamic_extractor
def main():
parser = argparse.ArgumentParser(description="JavaScript PoC Feature Extractor CLI")
subparsers = parser.add_subparsers(dest="command", required=True)
# --- Command: folder ---
folder_parser = subparsers.add_parser("folder", help="Extract features from all JS files in a folder")
folder_parser.add_argument("-i", "--input-folder", required=True, help="Folder containing JS files")
folder_parser.add_argument("-o", "--output-csv", required=True, help="CSV output file")
folder_parser.add_argument("--crash", type=int, choices=[0, 1], default=0, help="Crash label (0 or 1)")
# --- Command: file ---
file_parser = subparsers.add_parser("file", help="Extract features from a single JS file")
file_parser.add_argument("-i", "--input", required=True, help="Path to JS PoC file")
file_parser.add_argument("--format", choices=["dict", "string", "json"], default="string", help="Output format")
file_parser.add_argument("--flags", type=str, help="Optional flags as a string")
file_parser.add_argument("--static", type=str, help="1 is static only, 0 is dynamic only, omit for both")
args = parser.parse_args()
if args.command == "folder":
process_poc_folder(args.input_folder, args.output_csv, args.crash)
dynamic_extractor.process_poc_folder(
args.input_folder, args.output_csv, args.crash)
elif args.command == "file":
flags = args.flags.split() if args.flags else []
merged_feats = {}
if args.static == "1":
feats_static = process_poc(args.input, return_format="dict")
merged_feats = feats_static
elif args.static == "0":
feats_dynamic = dynamic_extractor.process_poc(args.input, flags, return_format="dict")
feats_dynamic.pop("filename", None)
merged_feats = feats_dynamic
else:
feats_static = process_poc(args.input, return_format="dict")
feats_dynamic = dynamic_extractor.process_poc(args.input, flags, return_format="dict")
feats_dynamic.pop("filename", None)
merged_feats = {**feats_static, **feats_dynamic}
if args.format == "dict":
print(json.dumps(merged_feats, indent=2, ensure_ascii=False))
elif args.format == "json":
dump = json.dumps(merged_feats, separators=(",", ":"))
tmp_path = Path("/tmp/feature_dump.json")
tmp_path.write_text(dump, encoding="utf-8")
else: # string format
# Flatten to comma-separated format
feat_str = ",".join(f"{k}={v}" for k, v in merged_feats.items())
print(feat_str)
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()