Spaces:

ArturG9
/

Stroke_Prediction_App_Streamlit

Sleeping

App Files Files Community

ArturG9 commited on Jul 2

Commit

c52a337

•

1 Parent(s): 5b5bce3

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -215

app.py CHANGED Viewed

@@ -1,217 +1,110 @@
 import streamlit as st
-import pandas as pd
 import joblib
-from enum import Enum
-from pydantic import BaseModel, Field, confloat, constr, conlist, ValidationError
-from typing import Optional
-# Load the model
-model = joblib.load('lgb_model_main.joblib')
-categorical_features = [
-    'NAME_CONTRACT_TYPE',
-    'CODE_GENDER',
-    'NAME_INCOME_TYPE',
-    'NAME_EDUCATION_TYPE',
-    'NAME_FAMILY_STATUS',
-    'OCCUPATION_TYPE',
-    'ORGANIZATION_TYPE',
-]
-class ContractType(str, Enum):
-    Cash_loans = "Cash loans"
-    Revolving_loans = "Revolving loans"
-class Gender(str, Enum):
-    Male = "M"
-    Female = "F"
-    XNA = "XNA"
-class IncomeType(str, Enum):
-    Working = "Working"
-    Other = "Other"
-    Commercial_associate = "Commercial associate"
-    Pensioner = "Pensioner"
-class EducationType(str, Enum):
-    Other = "Other"
-    Higher_education = "Higher education"
-    Secondary = "Secondary / secondary special"
-class FamilyStatus(str, Enum):
-    Civil_marriage = "Civil marriage"
-    Married = "Married"
-    Single = "Single / not married"
-    Other = "Other"
-class OccupationType(str, Enum):
-    Laborers = "Laborers"
-    Sales_staff = "Sales staff"
-    Core_staff = "Core staff"
-    Managers = "Managers"
-    Drivers = "Drivers"
-    Other = "Other"
-class OrganizationType(str, Enum):
-    Business_Entity = "Business Entity Type 3"
-    Other = "Other"
-    XNA = "XNA"
-    Self_employed = "Self-employed"
-class PredictionInput(BaseModel):
-    AMT_INCOME_TOTAL: confloat(ge=0)
-    AMT_CREDIT: confloat(ge=0)
-    REGION_POPULATION_RELATIVE: confloat(ge=0)
-    DAYS_REGISTRATION: int
-    DAYS_BIRTH: int
-    DAYS_ID_PUBLISH: int
-    FLAG_WORK_PHONE: int
-    FLAG_PHONE: int
-    REGION_RATING_CLIENT_W_CITY: int
-    REG_CITY_NOT_WORK_CITY: int
-    FLAG_DOCUMENT_3: int
-    NAME_CONTRACT_TYPE: ContractType
-    CODE_GENDER: Gender
-    FLAG_OWN_CAR: int
-    NAME_INCOME_TYPE: IncomeType
-    NAME_EDUCATION_TYPE: EducationType
-    NAME_FAMILY_STATUS: FamilyStatus
-    OCCUPATION_TYPE: OccupationType
-    ORGANIZATION_TYPE: OrganizationType
-    CREDIT_ACTIVE_Active_count_Bureau: Optional[int] = None
-    CREDIT_ACTIVE_Closed_count_Bureau: Optional[int] = None
-    DAYS_CREDIT_Bureau: Optional[int] = None
-    AMT_INSTALMENT_mean_HCredit_installments: Optional[int] = None
-    DAYS_INSTALMENT_mean_HCredit_installments: Optional[int] = None
-    NUM_INSTALMENT_NUMBER_mean_HCredit_installments: Optional[int] = None
-    NUM_INSTALMENT_VERSION_mean_HCredit_installments: Optional[int] = None
-    NAME_CONTRACT_STATUS_Active_count_pos_cash: Optional[int] = None
-    NAME_CONTRACT_STATUS_Completed_count_pos_cash: Optional[int] = None
-    SK_DPD_DEF_pos_cash: Optional[int] = None
-    NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp: Optional[int] = None
-    NAME_GOODS_CATEGORY_Other_count_HCredit_PApp: Optional[int] = None
-    NAME_PORTFOLIO_Cash_count_HCredit_PApp: Optional[int] = None
-    NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp: Optional[int] = None
-    NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp: Optional[int] = None
-    NAME_YIELD_GROUP_high_count_HCredit_PApp: Optional[int] = None
-    NAME_YIELD_GROUP_low_action_count_HCredit_PApp: Optional[int] = None
-    AMT_CREDIT_HCredit_PApp: Optional[int] = None
-    SELLERPLACE_AREA_HCredit_PApp: Optional[int] = None
-def make_prediction(input_data: dict):
-    try:
-        # Convert dictionary to a pandas DataFrame
-        input_df = pd.DataFrame([input_data])
-        # Convert categorical features to 'category' type
-        for feature in categorical_features:
-            input_df[feature] = input_df[feature].astype('category')
-        # Make predictions using the loaded model
-        predictions = model.predict_proba(input_df, categorical_feature=categorical_features)[:, 1]
-        # Placeholder response for demonstration
-        response = {"Probability for this credit to be defaulted is: ": predictions[0]}  # Extract the probability for class 1
-        return response
-    except Exception as e:
-        return {"error": str(e)}
-def main():
-    st.title("Credit Default Prediction")
-    st.header("Input Data")
-    with st.form(key='input_form'):
-        AMT_INCOME_TOTAL = st.number_input("AMT_INCOME_TOTAL", min_value=0.0, format="%f")
-        AMT_CREDIT = st.number_input("AMT_CREDIT", min_value=0.0, format="%f")
-        REGION_POPULATION_RELATIVE = st.number_input("REGION_POPULATION_RELATIVE", min_value=0.0, format="%f")
-        DAYS_REGISTRATION = st.number_input("DAYS_REGISTRATION", min_value=-100000, max_value=100000, format="%d")
-        DAYS_BIRTH = st.number_input("DAYS_BIRTH", min_value=-100000, max_value=100000, format="%d")
-        DAYS_ID_PUBLISH = st.number_input("DAYS_ID_PUBLISH", min_value=-100000, max_value=100000, format="%d")
-        FLAG_WORK_PHONE = st.number_input("FLAG_WORK_PHONE", min_value=0, max_value=1, format="%d")
-        FLAG_PHONE = st.number_input("FLAG_PHONE", min_value=0, max_value=1, format="%d")
-        REGION_RATING_CLIENT_W_CITY = st.number_input("REGION_RATING_CLIENT_W_CITY", min_value=0, max_value=10, format="%d")
-        REG_CITY_NOT_WORK_CITY = st.number_input("REG_CITY_NOT_WORK_CITY", min_value=0, max_value=1, format="%d")
-        FLAG_DOCUMENT_3 = st.number_input("FLAG_DOCUMENT_3", min_value=0, max_value=1, format="%d")
-        NAME_CONTRACT_TYPE = st.selectbox("NAME_CONTRACT_TYPE", list(ContractType))
-        CODE_GENDER = st.selectbox("CODE_GENDER", list(Gender))
-        FLAG_OWN_CAR = st.number_input("FLAG_OWN_CAR", min_value=0, max_value=1, format="%d")
-        NAME_INCOME_TYPE = st.selectbox("NAME_INCOME_TYPE", list(IncomeType))
-        NAME_EDUCATION_TYPE = st.selectbox("NAME_EDUCATION_TYPE", list(EducationType))
-        NAME_FAMILY_STATUS = st.selectbox("NAME_FAMILY_STATUS", list(FamilyStatus))
-        OCCUPATION_TYPE = st.selectbox("OCCUPATION_TYPE", list(OccupationType))
-        ORGANIZATION_TYPE = st.selectbox("ORGANIZATION_TYPE", list(OrganizationType))
-        CREDIT_ACTIVE_Active_count_Bureau = st.number_input("CREDIT_ACTIVE_Active_count_Bureau", min_value=0, format="%d", value=0)
-        CREDIT_ACTIVE_Closed_count_Bureau = st.number_input("CREDIT_ACTIVE_Closed_count_Bureau", min_value=0, format="%d", value=0)
-        DAYS_CREDIT_Bureau = st.number_input("DAYS_CREDIT_Bureau", min_value=-100000, max_value=100000, format="%d", value=0)
-        AMT_INSTALMENT_mean_HCredit_installments = st.number_input("AMT_INSTALMENT_mean_HCredit_installments", min_value=0, format="%f", value=0.0)
-        DAYS_INSTALMENT_mean_HCredit_installments = st.number_input("DAYS_INSTALMENT_mean_HCredit_installments", min_value=-100000, max_value=100000, format="%d", value=0)
-        NUM_INSTALMENT_NUMBER_mean_HCredit_installments = st.number_input("NUM_INSTALMENT_NUMBER_mean_HCredit_installments", min_value=0, format="%d", value=0)
-        NUM_INSTALMENT_VERSION_mean_HCredit_installments = st.number_input("NUM_INSTALMENT_VERSION_mean_HCredit_installments", min_value=0, format="%d", value=0)
-        NAME_CONTRACT_STATUS_Active_count_pos_cash = st.number_input("NAME_CONTRACT_STATUS_Active_count_pos_cash", min_value=0, format="%d", value=0)
-        NAME_CONTRACT_STATUS_Completed_count_pos_cash = st.number_input("NAME_CONTRACT_STATUS_Completed_count_pos_cash", min_value=0, format="%d", value=0)
-        SK_DPD_DEF_pos_cash = st.number_input("SK_DPD_DEF_pos_cash", min_value=0, format="%d", value=0)
-        NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp = st.number_input("NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp", min_value=0, format="%d", value=0)
-        NAME_GOODS_CATEGORY_Other_count_HCredit_PApp = st.number_input("NAME_GOODS_CATEGORY_Other_count_HCredit_PApp", min_value=0, format="%d", value=0)
-        NAME_PORTFOLIO_Cash_count_HCredit_PApp = st.number_input("NAME_PORTFOLIO_Cash_count_HCredit_PApp", min_value=0, format="%d", value=0)
-        NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp = st.number_input("NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp", min_value=0, format="%d", value=0)
-        NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp = st.number_input("NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp", min_value=0, format="%d", value=0)
-        NAME_YIELD_GROUP_high_count_HCredit_PApp = st.number_input("NAME_YIELD_GROUP_high_count_HCredit_PApp", min_value=0, format="%d", value=0)
-        NAME_YIELD_GROUP_low_action_count_HCredit_PApp = st.number_input("NAME_YIELD_GROUP_low_action_count_HCredit_PApp", min_value=0, format="%d", value=0)
-        AMT_CREDIT_HCredit_PApp = st.number_input("AMT_CREDIT_HCredit_PApp", min_value=0, format="%f", value=0.0)
-        SELLERPLACE_AREA_HCredit_PApp = st.number_input("SELLERPLACE_AREA_HCredit_PApp", min_value=0, format="%d", value=0)
-        submit_button = st.form_submit_button(label='Predict')
-    if submit_button:
-        input_data = {
-            "AMT_INCOME_TOTAL": AMT_INCOME_TOTAL,
-            "AMT_CREDIT": AMT_CREDIT,
-            "REGION_POPULATION_RELATIVE": REGION_POPULATION_RELATIVE,
-            "DAYS_REGISTRATION": DAYS_REGISTRATION,
-            "DAYS_BIRTH": DAYS_BIRTH,
-            "DAYS_ID_PUBLISH": DAYS_ID_PUBLISH,
-            "FLAG_WORK_PHONE": FLAG_WORK_PHONE,
-            "FLAG_PHONE": FLAG_PHONE,
-            "REGION_RATING_CLIENT_W_CITY": REGION_RATING_CLIENT_W_CITY,
-            "REG_CITY_NOT_WORK_CITY": REG_CITY_NOT_WORK_CITY,
-            "FLAG_DOCUMENT_3": FLAG_DOCUMENT_3,
-            "NAME_CONTRACT_TYPE": NAME_CONTRACT_TYPE,
-            "CODE_GENDER": CODE_GENDER,
-            "FLAG_OWN_CAR": FLAG_OWN_CAR,
-            "NAME_INCOME_TYPE": NAME_INCOME_TYPE,
-            "NAME_EDUCATION_TYPE": NAME_EDUCATION_TYPE,
-            "NAME_FAMILY_STATUS": NAME_FAMILY_STATUS,
-            "OCCUPATION_TYPE": OCCUPATION_TYPE,
-            "ORGANIZATION_TYPE": ORGANIZATION_TYPE,
-            "CREDIT_ACTIVE_Active_count_Bureau": CREDIT_ACTIVE_Active_count_Bureau,
-            "CREDIT_ACTIVE_Closed_count_Bureau": CREDIT_ACTIVE_Closed_count_Bureau,
-            "DAYS_CREDIT_Bureau": DAYS_CREDIT_Bureau,
-            "AMT_INSTALMENT_mean_HCredit_installments": AMT_INSTALMENT_mean_HCredit_installments,
-            "DAYS_INSTALMENT_mean_HCredit_installments": DAYS_INSTALMENT_mean_HCredit_installments,
-            "NUM_INSTALMENT_NUMBER_mean_HCredit_installments": NUM_INSTALMENT_NUMBER_mean_HCredit_installments,
-            "NUM_INSTALMENT_VERSION_mean_HCredit_installments": NUM_INSTALMENT_VERSION_mean_HCredit_installments,
-            "NAME_CONTRACT_STATUS_Active_count_pos_cash": NAME_CONTRACT_STATUS_Active_count_pos_cash,
-            "NAME_CONTRACT_STATUS_Completed_count_pos_cash": NAME_CONTRACT_STATUS_Completed_count_pos_cash,
-            "SK_DPD_DEF_pos_cash": SK_DPD_DEF_pos_cash,
-            "NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp": NAME_CONTRACT_STATUS_Refused_count_HCredit_PApp,
-            "NAME_GOODS_CATEGORY_Other_count_HCredit_PApp": NAME_GOODS_CATEGORY_Other_count_HCredit_PApp,
-            "NAME_PORTFOLIO_Cash_count_HCredit_PApp": NAME_PORTFOLIO_Cash_count_HCredit_PApp,
-            "NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp": NAME_PRODUCT_TYPE_walk_in_count_HCredit_PApp,
-            "NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp": NAME_SELLER_INDUSTRY_Other_count_HCredit_PApp,
-            "NAME_YIELD_GROUP_high_count_HCredit_PApp": NAME_YIELD_GROUP_high_count_HCredit_PApp,
-            "NAME_YIELD_GROUP_low_action_count_HCredit_PApp": NAME_YIELD_GROUP_low_action_count_HCredit_PApp,
-            "AMT_CREDIT_HCredit_PApp": AMT_CREDIT_HCredit_PApp,
-            "SELLERPLACE_AREA_HCredit_PApp": SELLERPLACE_AREA_HCredit_PApp
-        }
-        try:
-            input_data_validated = PredictionInput(**input_data)
-            prediction = make_prediction(input_data_validated.dict())
-            st.write(prediction)
-        except ValidationError as e:
-            st.error(f"Validation error: {e}")
-if __name__ == "__main__":
-    main()

+import io
+import pickle
 import streamlit as st
 import joblib
+import shap
+import pandas as pd
+import matplotlib.pyplot as plt
+# Load the LightGBM model and other necessary objects
+with open('lgb1_model.pkl', 'rb') as f:
+    lgb1 = pickle.load(f)
+categorical_features = joblib.load("categorical_features.joblib")
+encoder = joblib.load("encoder.joblib")
+# Sidebar option to select the dashboard
+option = st.sidebar.selectbox("Which dashboard?", ("Model information", "Stroke prediction"))
+st.title(option)
+def get_pred():
+    """
+    Function to display the stroke probability calculator and Shap force plot.
+    """
+    st.header("Stroke probability calculator ")
+    # User input for prediction
+    gender = st.selectbox("Select gender: ", ["Male", "Female", 'Other'])
+    work_type = st.selectbox("Work type: ", ["Private", "Self_employed", 'children', 'Govt_job', 'Never_worked'])
+    residence_status = st.selectbox("Residence status: ", ["Urban", "Rural"])
+    smoking_status = st.selectbox("Smoking status: ", ["Unknown", "formerly smoked", 'never smoked', 'smokes'])
+    age = st.slider("Input age: ", 0, 120)
+    hypertension = st.select_slider("Do you have hypertension: ", [0, 1])
+    heart_disease = st.select_slider("Do you have heart disease: ", [0, 1])
+    ever_married = st.select_slider("Have you ever married? ", [0, 1])
+    avg_glucosis_lvl = st.slider("Average glucosis level: ", 50, 280)
+    bmi = st.slider("Input Bmi: ", 10, 100)
+    # User input data
+    data = {
+        "gender": gender,
+        "work_type": work_type,
+        "Residence_type": residence_status,
+        "smoking_status": smoking_status,
+        "age": age,
+        "hypertension": hypertension,
+        "heart_disease": heart_disease,
+        "ever_married": ever_married,
+        "avg_glucose_level": avg_glucosis_lvl,
+        "bmi": bmi
+    }
+    # Prediction button
+    if st.button("Predict"):
+        # Convert input data to a DataFrame
+        X = pd.DataFrame([data])
+        # Encode categorical features
+        encoded_features = encoder.transform(X[categorical_features])
+        # Get the feature names from the encoder
+        feature_names = encoder.get_feature_names_out(input_features=categorical_features)
+        # Create a DataFrame with the encoded features and feature names
+        encoded_df = pd.DataFrame(encoded_features, columns=feature_names)
+        X_encoded = pd.concat([X.drop(columns=categorical_features), encoded_df], axis=1)
+        # Make predictions
+        prediction_proba = lgb1.predict_proba(X_encoded)
+        # Get SHAP values
+        explainer = shap.TreeExplainer(lgb1)
+        shap_values = explainer.shap_values(X_encoded)
+        # Extract prediction probability and display it to the user
+        probability = prediction_proba[0, 1]  # Assuming binary classification
+        st.subheader(f"The predicted probability of stroke is {probability}.")
+        st.subheader("IF you see result , higher than 0.3, we advice you to see a doctor")
+        st.header("Shap forceplot")
+        st.subheader("Features values impact on model made prediction")
+        # Display SHAP force plot using Matplotlib
+        shap.force_plot(explainer.expected_value[1], shap_values[1], features=X_encoded.iloc[0, :], matplotlib=True)
+        # Save the figure to a BytesIO buffer
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png", dpi=800)
+        buf.seek(0)
+        # Display the image in Streamlit
+        st.image(buf, width=1100)
+        # Display summary plot of feature importance
+        shap.summary_plot(shap_values[1], X_encoded)
+        # Display interaction summary plot
+        shap_interaction_values = explainer.shap_interaction_values(X_encoded)
+        shap.summary_plot(shap_interaction_values, X_encoded)
+# Execute get_pred() only if the option is "Stroke prediction"
+if option == "Stroke prediction":
+    get_pred()
+if option == "Model information":
+    st.header("Light gradient boosting model")
+    st.subheader("First tree of light gradient boosting model and how it makes decisions")
+    st.image(r'lgbm_tree.png')
+    st.subheader("Shap values visualization of how features contribute to model prediction")
+    st.image(r'lgbm_model_shap_evaluation.png')