-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathloan.py
More file actions
98 lines (82 loc) · 3.3 KB
/
loan.py
File metadata and controls
98 lines (82 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.tree import DecisionTreeClassifier
# Step 1: Load the data
# Assuming the data is in a CSV file named 'loan_data.csv'
data = pd.read_csv('loan_data.csv')
# Step 2: Preprocess the data
# Separate features (X) and target (y)
X = data.drop(columns=['default']) # Features
y = data['default'] # Target (1 for default, 0 for no default)
# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns
# Preprocessing pipeline
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_cols),
('cat', categorical_transformer, categorical_cols)
])
# Step 3: Train and evaluate models
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define models to evaluate
models = {
'Logistic Regression': LogisticRegression(),
'Decision Tree': DecisionTreeClassifier(),
'Random Forest': RandomForestClassifier(),
'Gradient Boosting': GradientBoostingClassifier()
}
# Evaluate each model
results = {}
for name, model in models.items():
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
y_pred_proba = pipeline.predict_proba(X_test)[:, 1] # Probability of default
# Store results
results[name] = {
'Accuracy': accuracy_score(y_test, y_pred),
'Precision': precision_score(y_test, y_pred),
'Recall': recall_score(y_test, y_pred),
'F1-Score': f1_score(y_test, y_pred),
'ROC-AUC': roc_auc_score(y_test, y_pred_proba)
}
# Display results
results_df = pd.DataFrame(results).T
print("Model Performance Comparison:")
print(results_df)
# Step 4: Select the best model
best_model_name = results_df['ROC-AUC'].idxmax()
best_model = models[best_model_name]
print(f"\nBest Model: {best_model_name}")
# Step 5: Create a function to calculate Expected Loss
def expected_loss(borrower_data, loan_amount, recovery_rate=0.1):
# Preprocess borrower data
borrower_df = pd.DataFrame([borrower_data])
borrower_processed = preprocessor.transform(borrower_df)
# Predict probability of default
pd_value = best_model.predict_proba(borrower_processed)[0, 1]
# Calculate expected loss
el = pd_value * loan_amount * (1 - recovery_rate)
return el
# Example usage
borrower_data = {
'income': 50000,
'total_loans_outstanding': 20000,
'other_metric_1': 10,
'other_metric_2': 5,
'categorical_feature': 'category_A'
}
loan_amount = 10000
el = expected_loss(borrower_data, loan_amount)
print(f"\nExpected Loss for the Loan: ${el:.2f}")