Data Analysis with Python Projects - Demographic Data Analyzer - Python

Your code so far

import pandas as pd

data = pd.read_csv(‘demographic_data.csv’) # Replace ‘your_dataset.csv’ with your dataset filename

race_counts = data[‘race’].value_counts()

average_age_men = data[data[‘sex’] == ‘Male’][‘age’].mean()

bachelors_percentage = (data[‘education’] == ‘Bachelors’).mean() * 100

advanced_education = data[‘education’].isin([‘Bachelors’, ‘Masters’, ‘Doctorate’])
higher_education_rich = data[advanced_education & (data[‘salary’] == ‘>50K’)].shape[0] / data[advanced_education].shape[0] * 100

lower_education_rich = data[~advanced_education & (data[‘salary’] == ‘>50K’)].shape[0] / data[~advanced_education].shape[0] * 100

min_work_hours = data[‘hours-per-week’].min()

num_min_workers = data[data[‘hours-per-week’] == min_work_hours]
rich_percentage = (num_min_workers[num_min_workers[‘salary’] == ‘>50K’].shape[0] / num_min_workers.shape[0]) * 100

highest_earning_country = (data[data[‘salary’] == ‘>50K’][‘native-country’].value_counts() / data[‘native-country’].value_counts()).idxmax()
highest_earning_country_percentage = (data[(data[‘native-country’] == highest_earning_country) & (data[‘salary’] == ‘>50K’)].shape[0] / data[data[‘native-country’] == highest_earning_country].shape[0]) * 100

top_IN_occupation = data[(data[‘native-country’] == ‘India’) & (data[‘salary’] == ‘>50K’)][‘occupation’].value_counts().idxmax()

print(“Race counts:”)
print(race_counts)
print(“\nAverage age of men:”, round(average_age_men, 1))
print(“\nPercentage of people with Bachelor’s degree:”, round(bachelors_percentage, 1))
print(“\nPercentage of people with advanced education earning >50K:”, round(higher_education_rich, 1))
print(“\nPercentage of people without advanced education earning >50K:”, round(lower_education_rich, 1))
print(“\nMinimum number of hours worked per week:”, min_work_hours)
print(“\nPercentage of people working min hours per week earning >50K:”, round(rich_percentage, 1))
print(“\nCountry with the highest percentage earning >50K:”, highest_earning_country)
print(“Percentage:”, round(highest_earning_country_percentage, 1))
print(“\nMost popular occupation for >50K earners in India:”, top_IN_occupation)