import streamlit as st import pandas as pd import joblib from enum import Enum from pydantic import BaseModel, Field, confloat, constr, conlist, ValidationError from typing import Optional # Load the model model = joblib.load('lgb_model_main.joblib') categorical_features = [ 'NAME_CONTRACT_TYPE', 'CODE_GENDER', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'OCCUPATION_TYPE', 'ORGANIZATION_TYPE', ] class ContractType(str, Enum): Cash_loans = "Cash loans" Revolving_loans = "Revolving loans" class Gender(str, Enum): Male = "M" Female = "F" XNA = "XNA" class IncomeType(str, Enum): Working = "Working" Other = "Other" Commercial_associate = "Commercial associate" Pensioner = "Pensioner" class EducationType(str, Enum): Other = "Other" Higher_education = "Higher education" Secondary = "Secondary / secondary special" class FamilyStatus(str, Enum): Civil_marriage = "Civil marriage" Married = "Married" Single = "Single / not married" Other = "Other" class OccupationType(str, Enum): Laborers = "Laborers" Sales_staff = "Sales staff" Core_staff = "Core staff" Managers = "Managers" Drivers = "Drivers" Other = "Other" class OrganizationType(str, Enum): Business_Entity = "Business Entity Type 3" Other = "Other" XNA = "XNA" Self_employed = "Self-employed" class PredictionInput(BaseModel): AMT_INCOME_TOTAL: confloat(ge=0) AMT_CREDIT: confloat(ge=0) REGION_POPULATION_RELATIVE: confloat(ge=0) DAYS_REGISTRATION: int DAYS_BIRTH: int DAYS_ID_PUBLISH: int FLAG_WORK_PHONE: int FLAG_PHONE: int REGION_RATING_CLIENT_W_CITY: int REG_CITY_NOT_WORK_CITY: int FLAG_DOCUMENT_3: int NAME_CONTRACT_TYPE: ContractType CODE_GENDER: Gender FLAG_OWN_CAR: int NAME_INCOME_TYPE: IncomeType NAME_EDUCATION_TYPE: EducationType NAME_FAMILY_STATUS: FamilyStatus OCCUPATION_TYPE: OccupationType ORGANIZATION_TYPE: OrganizationType CREDIT_ACTIVE_Active_count_Bureau: Optional[int] = None CREDIT_ACTIVE_Closed_count_Bureau: Optional[int] = None DAYS_CREDIT_Bureau: Optional[int] = None AMT_INSTALMENT_mean_HCredit_installments: Optional[int] = None DAYS_INSTALMENT_mean_HCredit_installments: Optional[int] = None NUM_INSTALMENT_NUMBER_mean_HCredit_installments: Optional[int] = None NUM_INSTALMENT_VERSION_mean_HCredit_installments: Optional[int] = None NAME_CONTRACT_STATUS_Active_count_pos_cash: Optional[int] = None NAME_CONTRACT_STATUS_Completed_count_pos_cash: Optional[int] = None SK_DPD_DEF_pos_cash: Optional[int] = None NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp: Optional[int] = None NAME_GOODS_CATEGORY_Other_count_HCredit_PApp: Optional[int] = None NAME_PORTFOLIO_Cash_count_HCredit_PApp: Optional[int] = None NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp: Optional[int] = None NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp: Optional[int] = None NAME_YIELD_GROUP_high_count_HCredit_PApp: Optional[int] = None NAME_YIELD_GROUP_low_action_count_HCredit_PApp: Optional[int] = None AMT_CREDIT_HCredit_PApp: Optional[int] = None SELLERPLACE_AREA_HCredit_PApp: Optional[int] = None def make_prediction(input_data: dict): try: # Convert dictionary to a pandas DataFrame input_df = pd.DataFrame([input_data]) # Convert categorical features to 'category' type for feature in categorical_features: input_df[feature] = input_df[feature].astype('category') # Make predictions using the loaded model predictions = model.predict_proba(input_df, categorical_feature=categorical_features)[:, 1] # Placeholder response for demonstration response = {"Probability for this credit to be defaulted is: ": predictions[0]} # Extract the probability for class 1 return response except Exception as e: return {"error": str(e)} def main(): st.title("Credit Default Prediction") st.header("Input Data") with st.form(key='input_form'): AMT_INCOME_TOTAL = st.number_input("AMT_INCOME_TOTAL", min_value=0.0, format="%f") AMT_CREDIT = st.number_input("AMT_CREDIT", min_value=0.0, format="%f") REGION_POPULATION_RELATIVE = st.number_input("REGION_POPULATION_RELATIVE", min_value=0.0, format="%f") DAYS_REGISTRATION = st.number_input("DAYS_REGISTRATION", min_value=-100000, max_value=100000, format="%d") DAYS_BIRTH = st.number_input("DAYS_BIRTH", min_value=-100000, max_value=100000, format="%d") DAYS_ID_PUBLISH = st.number_input("DAYS_ID_PUBLISH", min_value=-100000, max_value=100000, format="%d") FLAG_WORK_PHONE = st.number_input("FLAG_WORK_PHONE", min_value=0, max_value=1, format="%d") FLAG_PHONE = st.number_input("FLAG_PHONE", min_value=0, max_value=1, format="%d") REGION_RATING_CLIENT_W_CITY = st.number_input("REGION_RATING_CLIENT_W_CITY", min_value=0, max_value=10, format="%d") REG_CITY_NOT_WORK_CITY = st.number_input("REG_CITY_NOT_WORK_CITY", min_value=0, max_value=1, format="%d") FLAG_DOCUMENT_3 = st.number_input("FLAG_DOCUMENT_3", min_value=0, max_value=1, format="%d") NAME_CONTRACT_TYPE = st.selectbox("NAME_CONTRACT_TYPE", list(ContractType)) CODE_GENDER = st.selectbox("CODE_GENDER", list(Gender)) FLAG_OWN_CAR = st.number_input("FLAG_OWN_CAR", min_value=0, max_value=1, format="%d") NAME_INCOME_TYPE = st.selectbox("NAME_INCOME_TYPE", list(IncomeType)) NAME_EDUCATION_TYPE = st.selectbox("NAME_EDUCATION_TYPE", list(EducationType)) NAME_FAMILY_STATUS = st.selectbox("NAME_FAMILY_STATUS", list(FamilyStatus)) OCCUPATION_TYPE = st.selectbox("OCCUPATION_TYPE", list(OccupationType)) ORGANIZATION_TYPE = st.selectbox("ORGANIZATION_TYPE", list(OrganizationType)) CREDIT_ACTIVE_Active_count_Bureau = st.number_input("CREDIT_ACTIVE_Active_count_Bureau", min_value=0, format="%d", value=0) CREDIT_ACTIVE_Closed_count_Bureau = st.number_input("CREDIT_ACTIVE_Closed_count_Bureau", min_value=0, format="%d", value=0) DAYS_CREDIT_Bureau = st.number_input("DAYS_CREDIT_Bureau", min_value=-100000, max_value=100000, format="%d", value=0) AMT_INSTALMENT_mean_HCredit_installments = st.number_input("AMT_INSTALMENT_mean_HCredit_installments", min_value=0, format="%f", value=0.0) DAYS_INSTALMENT_mean_HCredit_installments = st.number_input("DAYS_INSTALMENT_mean_HCredit_installments", min_value=-100000, max_value=100000, format="%d", value=0) NUM_INSTALMENT_NUMBER_mean_HCredit_installments = st.number_input("NUM_INSTALMENT_NUMBER_mean_HCredit_installments", min_value=0, format="%d", value=0) NUM_INSTALMENT_VERSION_mean_HCredit_installments = st.number_input("NUM_INSTALMENT_VERSION_mean_HCredit_installments", min_value=0, format="%d", value=0) NAME_CONTRACT_STATUS_Active_count_pos_cash = st.number_input("NAME_CONTRACT_STATUS_Active_count_pos_cash", min_value=0, format="%d", value=0) NAME_CONTRACT_STATUS_Completed_count_pos_cash = st.number_input("NAME_CONTRACT_STATUS_Completed_count_pos_cash", min_value=0, format="%d", value=0) SK_DPD_DEF_pos_cash = st.number_input("SK_DPD_DEF_pos_cash", min_value=0, format="%d", value=0) NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp = st.number_input("NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp", min_value=0, format="%d", value=0) NAME_GOODS_CATEGORY_Other_count_HCredit_PApp = st.number_input("NAME_GOODS_CATEGORY_Other_count_HCredit_PApp", min_value=0, format="%d", value=0) NAME_PORTFOLIO_Cash_count_HCredit_PApp = st.number_input("NAME_PORTFOLIO_Cash_count_HCredit_PApp", min_value=0, format="%d", value=0) NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp = st.number_input("NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp", min_value=0, format="%d", value=0) NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp = st.number_input("NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp", min_value=0, format="%d", value=0) NAME_YIELD_GROUP_high_count_HCredit_PApp = st.number_input("NAME_YIELD_GROUP_high_count_HCredit_PApp", min_value=0, format="%d", value=0) NAME_YIELD_GROUP_low_action_count_HCredit_PApp = st.number_input("NAME_YIELD_GROUP_low_action_count_HCredit_PApp", min_value=0, format="%d", value=0) AMT_CREDIT_HCredit_PApp = st.number_input("AMT_CREDIT_HCredit_PApp", min_value=0, format="%f", value=0.0) SELLERPLACE_AREA_HCredit_PApp = st.number_input("SELLERPLACE_AREA_HCredit_PApp", min_value=0, format="%d", value=0) submit_button = st.form_submit_button(label='Predict') if submit_button: input_data = { "AMT_INCOME_TOTAL": AMT_INCOME_TOTAL, "AMT_CREDIT": AMT_CREDIT, "REGION_POPULATION_RELATIVE": REGION_POPULATION_RELATIVE, "DAYS_REGISTRATION": DAYS_REGISTRATION, "DAYS_BIRTH": DAYS_BIRTH, "DAYS_ID_PUBLISH": DAYS_ID_PUBLISH, "FLAG_WORK_PHONE": FLAG_WORK_PHONE, "FLAG_PHONE": FLAG_PHONE, "REGION_RATING_CLIENT_W_CITY": REGION_RATING_CLIENT_W_CITY, "REG_CITY_NOT_WORK_CITY": REG_CITY_NOT_WORK_CITY, "FLAG_DOCUMENT_3": FLAG_DOCUMENT_3, "NAME_CONTRACT_TYPE": NAME_CONTRACT_TYPE, "CODE_GENDER": CODE_GENDER, "FLAG_OWN_CAR": FLAG_OWN_CAR, "NAME_INCOME_TYPE": NAME_INCOME_TYPE, "NAME_EDUCATION_TYPE": NAME_EDUCATION_TYPE, "NAME_FAMILY_STATUS": NAME_FAMILY_STATUS, "OCCUPATION_TYPE": OCCUPATION_TYPE, "ORGANIZATION_TYPE": ORGANIZATION_TYPE, "CREDIT_ACTIVE_Active_count_Bureau": CREDIT_ACTIVE_Active_count_Bureau, "CREDIT_ACTIVE_Closed_count_Bureau": CREDIT_ACTIVE_Closed_count_Bureau, "DAYS_CREDIT_Bureau": DAYS_CREDIT_Bureau, "AMT_INSTALMENT_mean_HCredit_installments": AMT_INSTALMENT_mean_HCredit_installments, "DAYS_INSTALMENT_mean_HCredit_installments": DAYS_INSTALMENT_mean_HCredit_installments, "NUM_INSTALMENT_NUMBER_mean_HCredit_installments": NUM_INSTALMENT_NUMBER_mean_HCredit_installments, "NUM_INSTALMENT_VERSION_mean_HCredit_installments": NUM_INSTALMENT_VERSION_mean_HCredit_installments, "NAME_CONTRACT_STATUS_Active_count_pos_cash": NAME_CONTRACT_STATUS_Active_count_pos_cash, "NAME_CONTRACT_STATUS_Completed_count_pos_cash": NAME_CONTRACT_STATUS_Completed_count_pos_cash, "SK_DPD_DEF_pos_cash": SK_DPD_DEF_pos_cash, "NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp": NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp, "NAME_GOODS_CATEGORY_Other_count_HCredit_PApp": NAME_GOODS_CATEGORY_Other_count_HCredit_PApp, "NAME_PORTFOLIO_Cash_count_HCredit_PApp": NAME_PORTFOLIO_Cash_count_HCredit_PApp, "NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp": NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp, "NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp": NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp, "NAME_YIELD_GROUP_high_count_HCredit_PApp": NAME_YIELD_GROUP_high_count_HCredit_PApp, "NAME_YIELD_GROUP_low_action_count_HCredit_PApp": NAME_YIELD_GROUP_low_action_count_HCredit_PApp, "AMT_CREDIT_HCredit_PApp": AMT_CREDIT_HCredit_PApp, "SELLERPLACE_AREA_HCredit_PApp": SELLERPLACE_AREA_HCredit_PApp } try: input_data_validated = PredictionInput(**input_data) prediction = make_prediction(input_data_validated.dict()) st.write(prediction) except ValidationError as e: st.error(f"Validation error: {e}") if __name__ == "__main__": main()