-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript-zero.py
More file actions
83 lines (65 loc) · 2.03 KB
/
Copy pathscript-zero.py
File metadata and controls
83 lines (65 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pandas as pd
import sqlite3
from openai import OpenAI
import time
from statuti_utils import *
# === CONFIG ===
DB_PATH = "test-data-zero-fix.db"
TSV_PATH = "ternary_task_test_data.tsv"
PROMPT_FILE = "prompt-zero-sl.txt"
STOP_AFTER = 1000
MODEL_NAME = "gpt-4o-mini"
API_KEY = ""
PAUSE = 1
TABLE_NAME = "records"
with open(PROMPT_FILE, "r", encoding="utf-8") as f:
prompt_text = f.read()
# === INIT ===
client = OpenAI(api_key=API_KEY)
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
# === CREA TABELLA SE NON ESISTE ===
cur.execute(f"""
CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT,
label TEXT,
uni TEXT,
chatgpt_answer TEXT
)
""")
conn.commit()
# === IMPORTA TSV SOLO SE DB È VUOTO ===
cur.execute(f"SELECT COUNT(*) FROM {TABLE_NAME}")
count = cur.fetchone()[0]
if count == 0:
print("📥 Importing data from TSV...")
df = pd.read_csv(TSV_PATH, sep="\t")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.to_sql(TABLE_NAME, conn, if_exists="append", index=False)
print(f"✅ Imported {len(df)} records.")
else:
print("✅ Database already contains data; skipping TSV import.")
# === ELABORA SOLO LE RIGHE SENZA RISPOSTA ===
limit_clause = f"LIMIT {STOP_AFTER}" if STOP_AFTER else ""
cur.execute(f"SELECT id, text, label FROM {TABLE_NAME} WHERE chatgpt_answer IS NULL {limit_clause}")
rows = cur.fetchall()
print(f"🔍 Found {len(rows)} records to process.")
for row in rows:
rec_id, text, label = row
print(f"\n🧠 Processing ID {rec_id}...")
prompt_instance = prompt_text.replace("{test_sentence}", text)
answer = get_chatgpt_answer(prompt_instance, client, MODEL_NAME)
if not answer:
print("❌ No response, skipping.")
continue
cur.execute(f"""
UPDATE {TABLE_NAME}
SET chatgpt_answer = ?
WHERE id = ?
""", (answer, rec_id))
conn.commit()
print(f"✅ Updated ID {rec_id}")
time.sleep(PAUSE) # small delay to avoid rate limits
print("\n🏁 All done!")
conn.close()