| |
| """Untitled8.ipynb |
| |
| Automatically generated by Colab. |
| |
| Original file is located at |
| https://colab.research.google.com/drive/1SnoorFAucvS1FXD1vzyJnJ-_hoZUfJ_u |
| """ |
|
|
| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| import plotly.express as px |
|
|
| |
| st.set_page_config( |
| page_title="Developer Salary Explorer", |
| page_icon="π»", |
| layout="wide" |
| ) |
|
|
| @st.cache_data |
| def load_data(): |
| """Load and preprocess the Stack Overflow survey data""" |
| try: |
| df = pd.read_csv('stackoverflow_survey_single_response.txt') |
|
|
| |
| df_clean = df[df['converted_comp_yearly'].notna()].copy() |
| df_clean = df_clean[df_clean['converted_comp_yearly'] > 1000] |
|
|
| |
| for col in ['years_code', 'years_code_pro', 'age']: |
| df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce') |
| df_clean[col] = df_clean[col].fillna(df_clean[col].median()) |
|
|
| |
| df_clean['experience_level'] = pd.cut( |
| df_clean['years_code_pro'], |
| bins=[0, 2, 5, 10, 50], |
| labels=['Junior (0-2 yrs)', 'Mid (3-5 yrs)', 'Senior (6-10 yrs)', 'Expert (10+ yrs)'] |
| ) |
|
|
| |
| top_countries = ['United States of America', 'United Kingdom of Great Britain and Northern Ireland', |
| 'Germany', 'India', 'Canada', 'France', 'Australia'] |
| df_clean['country'] = df_clean['country'].apply( |
| lambda x: x if x in top_countries else 'Other' |
| ) |
|
|
| |
| education_map = { |
| 1: 'Less than Bachelor', |
| 2: 'Bachelor\'s Degree', |
| 3: 'Master\'s Degree', |
| 4: 'Doctoral Degree', |
| 5: 'Professional Degree' |
| } |
| df_clean['education_level'] = df_clean['ed_level'].map(education_map) |
| df_clean['education_level'] = df_clean['education_level'].fillna('Other') |
|
|
| return df_clean |
|
|
| except Exception as e: |
| st.error(f"Error loading data: {str(e)}") |
| return pd.DataFrame() |
|
|
| def main(): |
| st.title("π» Developer Salary Explorer") |
| st.markdown("Explore how country, education, and experience influence developer salaries worldwide.") |
|
|
| |
| df = load_data() |
|
|
| if df.empty: |
| st.error("No data loaded. Please check your data file.") |
| return |
|
|
| st.sidebar.header("π Filter Data") |
|
|
| |
| countries = sorted(df['country'].unique()) |
| selected_countries = st.sidebar.multiselect( |
| "Select Countries:", |
| options=countries, |
| default=countries[:3] |
| ) |
|
|
| |
| education_levels = sorted(df['education_level'].unique()) |
| selected_education = st.sidebar.multiselect( |
| "Select Education Levels:", |
| options=education_levels, |
| default=education_levels |
| ) |
|
|
| |
| min_exp, max_exp = st.sidebar.slider( |
| "Years of Professional Experience:", |
| min_value=int(df['years_code_pro'].min()), |
| max_value=int(min(df['years_code_pro'].max(), 40)), |
| value=(0, 15) |
| ) |
|
|
| |
| filtered_df = df[ |
| (df['country'].isin(selected_countries)) & |
| (df['education_level'].isin(selected_education)) & |
| (df['years_code_pro'] >= min_exp) & |
| (df['years_code_pro'] <= max_exp) |
| ] |
|
|
| |
| st.header("π Key Metrics") |
|
|
| col1, col2, col3, col4 = st.columns(4) |
|
|
| with col1: |
| median_salary = filtered_df['converted_comp_yearly'].median() |
| st.metric("Median Salary", f"${median_salary:,.0f}") |
|
|
| with col2: |
| avg_salary = filtered_df['converted_comp_yearly'].mean() |
| st.metric("Average Salary", f"${avg_salary:,.0f}") |
|
|
| with col3: |
| sample_size = len(filtered_df) |
| st.metric("Sample Size", f"{sample_size:,}") |
|
|
| with col4: |
| salary_range = f"${filtered_df['converted_comp_yearly'].min():,.0f} - ${filtered_df['converted_comp_yearly'].max():,.0f}" |
| st.metric("Salary Range", salary_range) |
|
|
| if sample_size == 0: |
| st.warning("No data matches your filters. Please adjust your selection.") |
| return |
|
|
| |
| st.header("π Salary Analysis") |
|
|
| |
| st.subheader("π Salary by Country") |
| country_stats = filtered_df.groupby('country')['converted_comp_yearly'].median().sort_values(ascending=False) |
| fig1 = px.bar( |
| x=country_stats.index, |
| y=country_stats.values, |
| title="Median Salary by Country", |
| labels={'x': 'Country', 'y': 'Median Salary (USD)'} |
| ) |
| st.plotly_chart(fig1, use_container_width=True) |
|
|
| |
| st.subheader("π Salary by Education Level") |
| fig2 = px.box( |
| filtered_df, |
| x='education_level', |
| y='converted_comp_yearly', |
| title="Salary Distribution by Education Level" |
| ) |
| st.plotly_chart(fig2, use_container_width=True) |
|
|
| |
| st.subheader("π
Salary vs Experience") |
| fig3 = px.scatter( |
| filtered_df, |
| x='years_code_pro', |
| y='converted_comp_yearly', |
| color='country', |
| title="Salary Growth with Experience", |
| trendline="lowess" |
| ) |
| st.plotly_chart(fig3, use_container_width=True) |
|
|
| |
| st.subheader("π¨βπ» Salary by Experience Level") |
| exp_stats = filtered_df.groupby('experience_level')['converted_comp_yearly'].median() |
| fig4 = px.bar( |
| x=exp_stats.index, |
| y=exp_stats.values, |
| title="Median Salary by Experience Level" |
| ) |
| st.plotly_chart(fig4, use_container_width=True) |
|
|
| |
| st.header("π Detailed Data View") |
| if st.checkbox("Show filtered data table"): |
| display_cols = ['country', 'education_level', 'experience_level', 'years_code_pro', 'converted_comp_yearly'] |
| st.dataframe( |
| filtered_df[display_cols].sort_values('converted_comp_yearly', ascending=False), |
| use_container_width=True |
| ) |
|
|
| if __name__ == "__main__": |
| main() |