rickystanley76 commited on
Commit
fc15bec
1 Parent(s): 17cf5f5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from pandas.api.types import (
4
+ is_categorical_dtype,
5
+ is_datetime64_any_dtype,
6
+ is_numeric_dtype,
7
+ is_object_dtype,
8
+ )
9
+
10
+ st.title("Telco Customer Churn APP")
11
+
12
+ st.write(
13
+ """This app shows the filetering of the dataframe using streamlit APP
14
+ """
15
+ )
16
+
17
+
18
+ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
19
+ """
20
+ Adds a UI on top of a dataframe to let viewers filter columns
21
+ Args:
22
+ df (pd.DataFrame): Original dataframe
23
+ Returns:
24
+ pd.DataFrame: Filtered dataframe
25
+ """
26
+ modify = st.checkbox("Add filters")
27
+
28
+ if not modify:
29
+ return df
30
+
31
+ df = df.copy()
32
+
33
+ # Try to convert datetimes into a standard format (datetime, no timezone)
34
+ for col in df.columns:
35
+ if is_object_dtype(df[col]):
36
+ try:
37
+ df[col] = pd.to_datetime(df[col])
38
+ except Exception:
39
+ pass
40
+
41
+ if is_datetime64_any_dtype(df[col]):
42
+ df[col] = df[col].dt.tz_localize(None)
43
+
44
+ modification_container = st.container()
45
+
46
+ with modification_container:
47
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
48
+ for column in to_filter_columns:
49
+ left, right = st.columns((1, 20))
50
+ left.write("↳")
51
+ # Treat columns with < 10 unique values as categorical
52
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
53
+ user_cat_input = right.multiselect(
54
+ f"Values for {column}",
55
+ df[column].unique(),
56
+ default=list(df[column].unique()),
57
+ )
58
+ df = df[df[column].isin(user_cat_input)]
59
+ elif is_numeric_dtype(df[column]):
60
+ _min = float(df[column].min())
61
+ _max = float(df[column].max())
62
+ step = (_max - _min) / 100
63
+ user_num_input = right.slider(
64
+ f"Values for {column}",
65
+ _min,
66
+ _max,
67
+ (_min, _max),
68
+ step=step,
69
+ )
70
+ df = df[df[column].between(*user_num_input)]
71
+ elif is_datetime64_any_dtype(df[column]):
72
+ user_date_input = right.date_input(
73
+ f"Values for {column}",
74
+ value=(
75
+ df[column].min(),
76
+ df[column].max(),
77
+ ),
78
+ )
79
+ if len(user_date_input) == 2:
80
+ user_date_input = tuple(map(pd.to_datetime, user_date_input))
81
+ start_date, end_date = user_date_input
82
+ df = df.loc[df[column].between(start_date, end_date)]
83
+ else:
84
+ user_text_input = right.text_input(
85
+ f"Substring or regex in {column}",
86
+ )
87
+ if user_text_input:
88
+ df = df[df[column].str.contains(user_text_input)]
89
+
90
+ return df
91
+
92
+
93
+ df = pd.read_csv(
94
+ "Telco-Customer-Churn.csv"
95
+ )
96
+ st.dataframe(filter_dataframe(df))