File size: 11,848 Bytes
6fc156b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
import streamlit as st
import pandas as pd
import joblib
from enum import Enum
from pydantic import BaseModel, Field, confloat, constr, conlist, ValidationError
from typing import Optional
# Load the model
model = joblib.load('lgb_model_main.joblib')
categorical_features = [
'NAME_CONTRACT_TYPE',
'CODE_GENDER',
'NAME_INCOME_TYPE',
'NAME_EDUCATION_TYPE',
'NAME_FAMILY_STATUS',
'OCCUPATION_TYPE',
'ORGANIZATION_TYPE',
]
class ContractType(str, Enum):
Cash_loans = "Cash loans"
Revolving_loans = "Revolving loans"
class Gender(str, Enum):
Male = "M"
Female = "F"
XNA = "XNA"
class IncomeType(str, Enum):
Working = "Working"
Other = "Other"
Commercial_associate = "Commercial associate"
Pensioner = "Pensioner"
class EducationType(str, Enum):
Other = "Other"
Higher_education = "Higher education"
Secondary = "Secondary / secondary special"
class FamilyStatus(str, Enum):
Civil_marriage = "Civil marriage"
Married = "Married"
Single = "Single / not married"
Other = "Other"
class OccupationType(str, Enum):
Laborers = "Laborers"
Sales_staff = "Sales staff"
Core_staff = "Core staff"
Managers = "Managers"
Drivers = "Drivers"
Other = "Other"
class OrganizationType(str, Enum):
Business_Entity = "Business Entity Type 3"
Other = "Other"
XNA = "XNA"
Self_employed = "Self-employed"
class PredictionInput(BaseModel):
AMT_INCOME_TOTAL: confloat(ge=0)
AMT_CREDIT: confloat(ge=0)
REGION_POPULATION_RELATIVE: confloat(ge=0)
DAYS_REGISTRATION: int
DAYS_BIRTH: int
DAYS_ID_PUBLISH: int
FLAG_WORK_PHONE: int
FLAG_PHONE: int
REGION_RATING_CLIENT_W_CITY: int
REG_CITY_NOT_WORK_CITY: int
FLAG_DOCUMENT_3: int
NAME_CONTRACT_TYPE: ContractType
CODE_GENDER: Gender
FLAG_OWN_CAR: int
NAME_INCOME_TYPE: IncomeType
NAME_EDUCATION_TYPE: EducationType
NAME_FAMILY_STATUS: FamilyStatus
OCCUPATION_TYPE: OccupationType
ORGANIZATION_TYPE: OrganizationType
CREDIT_ACTIVE_Active_count_Bureau: Optional[int] = None
CREDIT_ACTIVE_Closed_count_Bureau: Optional[int] = None
DAYS_CREDIT_Bureau: Optional[int] = None
AMT_INSTALMENT_mean_HCredit_installments: Optional[int] = None
DAYS_INSTALMENT_mean_HCredit_installments: Optional[int] = None
NUM_INSTALMENT_NUMBER_mean_HCredit_installments: Optional[int] = None
NUM_INSTALMENT_VERSION_mean_HCredit_installments: Optional[int] = None
NAME_CONTRACT_STATUS_Active_count_pos_cash: Optional[int] = None
NAME_CONTRACT_STATUS_Completed_count_pos_cash: Optional[int] = None
SK_DPD_DEF_pos_cash: Optional[int] = None
NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp: Optional[int] = None
NAME_GOODS_CATEGORY_Other_count_HCredit_PApp: Optional[int] = None
NAME_PORTFOLIO_Cash_count_HCredit_PApp: Optional[int] = None
NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp: Optional[int] = None
NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp: Optional[int] = None
NAME_YIELD_GROUP_high_count_HCredit_PApp: Optional[int] = None
NAME_YIELD_GROUP_low_action_count_HCredit_PApp: Optional[int] = None
AMT_CREDIT_HCredit_PApp: Optional[int] = None
SELLERPLACE_AREA_HCredit_PApp: Optional[int] = None
def make_prediction(input_data: dict):
try:
# Convert dictionary to a pandas DataFrame
input_df = pd.DataFrame([input_data])
# Convert categorical features to 'category' type
for feature in categorical_features:
input_df[feature] = input_df[feature].astype('category')
# Make predictions using the loaded model
predictions = model.predict_proba(input_df, categorical_feature=categorical_features)[:, 1]
# Placeholder response for demonstration
response = {"Probability for this credit to be defaulted is: ": predictions[0]} # Extract the probability for class 1
return response
except Exception as e:
return {"error": str(e)}
def main():
st.title("Credit Default Prediction")
st.header("Input Data")
with st.form(key='input_form'):
AMT_INCOME_TOTAL = st.number_input("AMT_INCOME_TOTAL", min_value=0.0, format="%f")
AMT_CREDIT = st.number_input("AMT_CREDIT", min_value=0.0, format="%f")
REGION_POPULATION_RELATIVE = st.number_input("REGION_POPULATION_RELATIVE", min_value=0.0, format="%f")
DAYS_REGISTRATION = st.number_input("DAYS_REGISTRATION", min_value=-100000, max_value=100000, format="%d")
DAYS_BIRTH = st.number_input("DAYS_BIRTH", min_value=-100000, max_value=100000, format="%d")
DAYS_ID_PUBLISH = st.number_input("DAYS_ID_PUBLISH", min_value=-100000, max_value=100000, format="%d")
FLAG_WORK_PHONE = st.number_input("FLAG_WORK_PHONE", min_value=0, max_value=1, format="%d")
FLAG_PHONE = st.number_input("FLAG_PHONE", min_value=0, max_value=1, format="%d")
REGION_RATING_CLIENT_W_CITY = st.number_input("REGION_RATING_CLIENT_W_CITY", min_value=0, max_value=10, format="%d")
REG_CITY_NOT_WORK_CITY = st.number_input("REG_CITY_NOT_WORK_CITY", min_value=0, max_value=1, format="%d")
FLAG_DOCUMENT_3 = st.number_input("FLAG_DOCUMENT_3", min_value=0, max_value=1, format="%d")
NAME_CONTRACT_TYPE = st.selectbox("NAME_CONTRACT_TYPE", list(ContractType))
CODE_GENDER = st.selectbox("CODE_GENDER", list(Gender))
FLAG_OWN_CAR = st.number_input("FLAG_OWN_CAR", min_value=0, max_value=1, format="%d")
NAME_INCOME_TYPE = st.selectbox("NAME_INCOME_TYPE", list(IncomeType))
NAME_EDUCATION_TYPE = st.selectbox("NAME_EDUCATION_TYPE", list(EducationType))
NAME_FAMILY_STATUS = st.selectbox("NAME_FAMILY_STATUS", list(FamilyStatus))
OCCUPATION_TYPE = st.selectbox("OCCUPATION_TYPE", list(OccupationType))
ORGANIZATION_TYPE = st.selectbox("ORGANIZATION_TYPE", list(OrganizationType))
CREDIT_ACTIVE_Active_count_Bureau = st.number_input("CREDIT_ACTIVE_Active_count_Bureau", min_value=0, format="%d", value=0)
CREDIT_ACTIVE_Closed_count_Bureau = st.number_input("CREDIT_ACTIVE_Closed_count_Bureau", min_value=0, format="%d", value=0)
DAYS_CREDIT_Bureau = st.number_input("DAYS_CREDIT_Bureau", min_value=-100000, max_value=100000, format="%d", value=0)
AMT_INSTALMENT_mean_HCredit_installments = st.number_input("AMT_INSTALMENT_mean_HCredit_installments", min_value=0, format="%f", value=0.0)
DAYS_INSTALMENT_mean_HCredit_installments = st.number_input("DAYS_INSTALMENT_mean_HCredit_installments", min_value=-100000, max_value=100000, format="%d", value=0)
NUM_INSTALMENT_NUMBER_mean_HCredit_installments = st.number_input("NUM_INSTALMENT_NUMBER_mean_HCredit_installments", min_value=0, format="%d", value=0)
NUM_INSTALMENT_VERSION_mean_HCredit_installments = st.number_input("NUM_INSTALMENT_VERSION_mean_HCredit_installments", min_value=0, format="%d", value=0)
NAME_CONTRACT_STATUS_Active_count_pos_cash = st.number_input("NAME_CONTRACT_STATUS_Active_count_pos_cash", min_value=0, format="%d", value=0)
NAME_CONTRACT_STATUS_Completed_count_pos_cash = st.number_input("NAME_CONTRACT_STATUS_Completed_count_pos_cash", min_value=0, format="%d", value=0)
SK_DPD_DEF_pos_cash = st.number_input("SK_DPD_DEF_pos_cash", min_value=0, format="%d", value=0)
NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp = st.number_input("NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp", min_value=0, format="%d", value=0)
NAME_GOODS_CATEGORY_Other_count_HCredit_PApp = st.number_input("NAME_GOODS_CATEGORY_Other_count_HCredit_PApp", min_value=0, format="%d", value=0)
NAME_PORTFOLIO_Cash_count_HCredit_PApp = st.number_input("NAME_PORTFOLIO_Cash_count_HCredit_PApp", min_value=0, format="%d", value=0)
NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp = st.number_input("NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp", min_value=0, format="%d", value=0)
NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp = st.number_input("NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp", min_value=0, format="%d", value=0)
NAME_YIELD_GROUP_high_count_HCredit_PApp = st.number_input("NAME_YIELD_GROUP_high_count_HCredit_PApp", min_value=0, format="%d", value=0)
NAME_YIELD_GROUP_low_action_count_HCredit_PApp = st.number_input("NAME_YIELD_GROUP_low_action_count_HCredit_PApp", min_value=0, format="%d", value=0)
AMT_CREDIT_HCredit_PApp = st.number_input("AMT_CREDIT_HCredit_PApp", min_value=0, format="%f", value=0.0)
SELLERPLACE_AREA_HCredit_PApp = st.number_input("SELLERPLACE_AREA_HCredit_PApp", min_value=0, format="%d", value=0)
submit_button = st.form_submit_button(label='Predict')
if submit_button:
input_data = {
"AMT_INCOME_TOTAL": AMT_INCOME_TOTAL,
"AMT_CREDIT": AMT_CREDIT,
"REGION_POPULATION_RELATIVE": REGION_POPULATION_RELATIVE,
"DAYS_REGISTRATION": DAYS_REGISTRATION,
"DAYS_BIRTH": DAYS_BIRTH,
"DAYS_ID_PUBLISH": DAYS_ID_PUBLISH,
"FLAG_WORK_PHONE": FLAG_WORK_PHONE,
"FLAG_PHONE": FLAG_PHONE,
"REGION_RATING_CLIENT_W_CITY": REGION_RATING_CLIENT_W_CITY,
"REG_CITY_NOT_WORK_CITY": REG_CITY_NOT_WORK_CITY,
"FLAG_DOCUMENT_3": FLAG_DOCUMENT_3,
"NAME_CONTRACT_TYPE": NAME_CONTRACT_TYPE,
"CODE_GENDER": CODE_GENDER,
"FLAG_OWN_CAR": FLAG_OWN_CAR,
"NAME_INCOME_TYPE": NAME_INCOME_TYPE,
"NAME_EDUCATION_TYPE": NAME_EDUCATION_TYPE,
"NAME_FAMILY_STATUS": NAME_FAMILY_STATUS,
"OCCUPATION_TYPE": OCCUPATION_TYPE,
"ORGANIZATION_TYPE": ORGANIZATION_TYPE,
"CREDIT_ACTIVE_Active_count_Bureau": CREDIT_ACTIVE_Active_count_Bureau,
"CREDIT_ACTIVE_Closed_count_Bureau": CREDIT_ACTIVE_Closed_count_Bureau,
"DAYS_CREDIT_Bureau": DAYS_CREDIT_Bureau,
"AMT_INSTALMENT_mean_HCredit_installments": AMT_INSTALMENT_mean_HCredit_installments,
"DAYS_INSTALMENT_mean_HCredit_installments": DAYS_INSTALMENT_mean_HCredit_installments,
"NUM_INSTALMENT_NUMBER_mean_HCredit_installments": NUM_INSTALMENT_NUMBER_mean_HCredit_installments,
"NUM_INSTALMENT_VERSION_mean_HCredit_installments": NUM_INSTALMENT_VERSION_mean_HCredit_installments,
"NAME_CONTRACT_STATUS_Active_count_pos_cash": NAME_CONTRACT_STATUS_Active_count_pos_cash,
"NAME_CONTRACT_STATUS_Completed_count_pos_cash": NAME_CONTRACT_STATUS_Completed_count_pos_cash,
"SK_DPD_DEF_pos_cash": SK_DPD_DEF_pos_cash,
"NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp": NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp,
"NAME_GOODS_CATEGORY_Other_count_HCredit_PApp": NAME_GOODS_CATEGORY_Other_count_HCredit_PApp,
"NAME_PORTFOLIO_Cash_count_HCredit_PApp": NAME_PORTFOLIO_Cash_count_HCredit_PApp,
"NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp": NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp,
"NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp": NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp,
"NAME_YIELD_GROUP_high_count_HCredit_PApp": NAME_YIELD_GROUP_high_count_HCredit_PApp,
"NAME_YIELD_GROUP_low_action_count_HCredit_PApp": NAME_YIELD_GROUP_low_action_count_HCredit_PApp,
"AMT_CREDIT_HCredit_PApp": AMT_CREDIT_HCredit_PApp,
"SELLERPLACE_AREA_HCredit_PApp": SELLERPLACE_AREA_HCredit_PApp
}
try:
input_data_validated = PredictionInput(**input_data)
prediction = make_prediction(input_data_validated.dict())
st.write(prediction)
except ValidationError as e:
st.error(f"Validation error: {e}")
if __name__ == "__main__":
main() |