Student-Placement-Prediction/predict.py at main · XC0ID/Student-Placement-Prediction · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import joblib
import pandas as pd
from src.config import MODEL_DIR, ALL_FEATURES, BEST_MODEL_NAME

def predict_placement(input_data: dict):

    model_path = MODEL_DIR / f"{BEST_MODEL_NAME}_pipeline.pkl"
    if not model_path.exists():
        candidates = sorted(MODEL_DIR.glob("*_pipeline.pkl"))
        if not candidates:
            raise FileNotFoundError(f"No trained model found in {MODEL_DIR}. Run 'main.py' first!")
        model_path = candidates[0]
        print(f"Using available model: {model_path.name}")

    model = joblib.load(model_path)
    df_input = pd.DataFrame([input_data])
    prediction = model.predict(df_input[ALL_FEATURES])[0]
    probability = model.predict_proba(df_input[ALL_FEATURES])[0][1]

    return {
        "will_be_placed": bool(prediction),
        "placement_probability": round(float(probability), 4),
        "confidence": "High" if probability > 0.7 else "Medium" if probability > 0.5 else "Low"
    }


if __name__ == "__main__":
    sample = {
        "branch": "CSE",
        "college_tier": "Tier-1",
        "cgpa": 8.7,
        "backlogs": 0,
        "coding_skills": 8.5,
        "dsa_score": 8.2,
        "aptitude_score": 85.0,
        "communication_skills": 7.8,
        "ml_knowledge": 7.0,
        "system_design": 6.5,
        "internships": 2,
        "projects_count": 4,
        "certifications": 3,
        "hackathons": 2,
        "open_source_contributions": 1,
        "extracurriculars": 2
    }

    result = predict_placement(sample)
    print(result)