import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go # Load and preprocess the dataset @st.cache_data def load_data(): # Load the dataset df = pd.read_csv('Human Development Index - Full.csv') # Select relevant columns base_columns = ['ISO3', 'Country', 'Human Development Groups', 'HDI Rank (2021)'] hdi_columns = [col for col in df.columns if col.startswith('Human Development Index')] columns_to_use = base_columns + hdi_columns df_hdi = df[columns_to_use] # Remove columns before 2010 columns_to_keep = ['ISO3', 'Country', 'Human Development Groups', 'HDI Rank (2021)'] + \ [col for col in hdi_columns if int(col[-5:-1]) >= 2010] df_hdi = df_hdi[columns_to_keep] # Drop rows with missing values and unnecessary columns df_hdi.dropna(inplace=True) df_hdi = df_hdi.drop(columns=['ISO3']) # Sort and create HDI Rank column df_hdi_sorted = df_hdi.sort_values('Human Development Index (2021)', ascending=False) df_hdi_sorted['HDI Rank (2021)'] = range(1, len(df_hdi_sorted) + 1) # Create additional dataframes df_years = df_hdi_sorted.drop(columns=['HDI Rank (2021)', 'Human Development Groups']) df_rank = df_hdi_sorted[['Country', 'HDI Rank (2021)']] df_hdi_groups = df_hdi_sorted[['Country', 'Human Development Groups']] return df_hdi_sorted, df_years, df_rank, df_hdi_groups # Load the data df_hdi_sorted, df_years, df_rank, df_hdi_groups = load_data() # Streamlit app st.title('Human Development Index Analysis') # Sidebar st.sidebar.header('Visualization Options') chart_type = st.sidebar.selectbox('Select Chart Type', ['Top 10 Countries', 'HDI Groups Distribution', 'HDI Trends', 'Bottom 10 Countries', 'HDI Improvement', 'HDI Distribution', 'World Map', 'HDI Comparison', 'HDI by Development Groups', 'HDI Sunburst']) # Main content if chart_type == 'Top 10 Countries': st.subheader('Top 10 Countries by HDI (2021)') fig = px.bar(df_years.head(10), x='Country', y='Human Development Index (2021)', title='Top 10 Countries by HDI (2021)', color='Human Development Index (2021)', color_continuous_scale='Viridis') st.plotly_chart(fig) elif chart_type == 'HDI Groups Distribution': st.subheader('Distribution of Countries by HDI Groups') fig = px.pie(df_hdi_groups, names='Human Development Groups', title='Distribution of Countries by HDI Groups') st.plotly_chart(fig) elif chart_type == 'HDI Trends': st.subheader('HDI Trends for Top 5 Countries') top_5 = df_rank.head()['Country'].tolist() fig = px.line(df_years[df_years['Country'].isin(top_5)], x=df_years.columns[1:], y='Country', title='HDI Trends for Top 5 Countries') st.plotly_chart(fig) elif chart_type == 'Bottom 10 Countries': st.subheader('Bottom 10 Countries by HDI (2021)') fig = px.bar(df_years.tail(10), x='Country', y='Human Development Index (2021)', title='Bottom 10 Countries by HDI (2021)') st.plotly_chart(fig) elif chart_type == 'HDI Improvement': st.subheader('Top 10 Countries with Highest HDI Improvement (2010-2021)') df_years['HDI_change'] = df_years['Human Development Index (2021)'] - df_years['Human Development Index (2010)'] fig = px.bar(df_years.nlargest(10, 'HDI_change'), x='Country', y='HDI_change', title='Top 10 Countries with Highest HDI Improvement (2010-2021)') st.plotly_chart(fig) elif chart_type == 'HDI Distribution': st.subheader('Distribution of HDI Values (2021)') fig = px.box(df_years, y='Human Development Index (2021)', title='Distribution of HDI Values (2021)') st.plotly_chart(fig) elif chart_type == 'World Map': st.subheader('World Map of Human Development Index (2021)') fig = px.choropleth(df_years, locations='Country', locationmode='country names', color='Human Development Index (2021)', title='World Map of Human Development Index (2021)', color_continuous_scale='Viridis') st.plotly_chart(fig) elif chart_type == 'HDI Comparison': st.subheader('HDI Comparison: 2010 vs 2021') fig = px.scatter(df_years, x='Human Development Index (2010)', y='Human Development Index (2021)', hover_name='Country', title='HDI Comparison: 2010 vs 2021') fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='No Change Line')) st.plotly_chart(fig) elif chart_type == 'HDI by Development Groups': st.subheader('HDI Distribution by Development Groups (2021)') fig = px.box(df_hdi_sorted, x='Human Development Groups', y='Human Development Index (2021)', title='HDI Distribution by Development Groups (2021)') st.plotly_chart(fig) elif chart_type == 'HDI Sunburst': st.subheader('HDI Distribution by Groups and Top Countries (2021)') df_sunburst = df_hdi_sorted.copy() df_sunburst['HDI_2021'] = pd.cut(df_sunburst['Human Development Index (2021)'], bins=[0, 0.55, 0.7, 0.8, 1], labels=['Low', 'Medium', 'High', 'Very High']) fig = px.sunburst(df_sunburst, path=['HDI_2021', 'Human Development Groups', 'Country'], values='Human Development Index (2021)', color='HDI_2021', color_discrete_map={'Low': 'red', 'Medium': 'orange', 'High': 'lightgreen', 'Very High': 'darkgreen'}, title='HDI Distribution by Groups and Top Countries (2021)') fig.update_traces(textinfo="label+percent entry") st.plotly_chart(fig) # Add more information or text as needed st.markdown(""" This app provides various visualizations of the Human Development Index (HDI) data. Use the sidebar to select different chart types and explore the data. Data source: [Human Development Index Dataset](https://www.kaggle.com/datasets/iamsouravbanerjee/human-development-index-dataset) """)