-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgemini_classifier.py
More file actions
82 lines (70 loc) · 3.22 KB
/
gemini_classifier.py
File metadata and controls
82 lines (70 loc) · 3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import json
import google.generativeai as genai
from dotenv import load_dotenv
load_dotenv()
class GeminiClassifier:
def __init__(self):
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY not found in .env file")
genai.configure(api_key=api_key)
# We use strict JSON generation for the classification
self.generation_config = {
"temperature": 0.1, # Low temperature for highly deterministic classification
"response_mime_type": "application/json",
}
# Using Gemini 1.5 Pro or Flash
self.model = genai.GenerativeModel(
model_name="gemini-1.5-flash",
generation_config=self.generation_config,
)
def analyze_text(self, text):
"""
Zero-shot prompt to classify Hinglish scam texts.
Returns a structured JSON with fraud score, category, and red flags.
"""
if not text or len(text) < 5:
return {
"fraud_score": 0,
"scam_category": "Unclear / Too Short",
"red_flags": [],
"summary": "The provided text is too short or empty. Provide a clear screenshot."
}
prompt = f"""
You are an expert cybersecurity analyst at 'RealityGuard India', specialized in analyzing WhatsApp and SMS messages for scams targeting Indian citizens.
Your task is to analyze the following Hinglish (Hindi + English) text extracted from a message.
SCAM CRITERIA TO LOOK FOR (Common in India):
1. **Urgency/Fear:** Threats of electricity disconnection, bank account block, SBI/HDFC KYC updates, police arrest.
2. **Greed/Lottery:** KBC lottery win, Jio tower installation, work-from-home YouTube likes jobs, free cryptocurrency.
3. **Suspicious Links:** Bit.ly links, .apk downloads, unofficial portals mimicking Indian banks or gov.
4. **Payment Requests:** Unknown UPI VPAs (e.g., @ybl, @okicici), requesting ₹10 or ₹5000 as "processing fee".
Analyze the text and return ONLY a valid JSON object with the following schema:
{{
"fraud_score": <integer from 0 to 100, where 100 is definitely a scam>,
"scam_category": "<string, e.g., 'Phishing', 'Lottery Scam', 'Electricity Bill Scam', 'Job Fraud', or 'Safe'>",
"red_flags": [
"<string: explicitly list the problematic parts of the text, e.g., 'Mentions urgency for KYC', 'Contains unofficial APK link'>"
],
"summary": "<string: A 1-2 sentence Hinglish or English explanation for the user on what this message is and what they should do.>"
}}
TEXT TO ANALYZE:
"{text}"
"""
try:
response = self.model.generate_content(prompt)
# Parse the JSON response explicitly
result = json.loads(response.text)
return result
except Exception as e:
# Fallback error structure
return {
"fraud_score": 0,
"scam_category": "Analysis Failed",
"red_flags": [],
"summary": f"Failed to analyze text using Gemini API. Error: {str(e)}"
}
if __name__ == "__main__":
# Test
classifier = GeminiClassifier()
print("Gemini Classifier initialized.")