Script to calculate expenditure growth and plot data
importpandasaspdimportplotly.expressaspxdf=pd.read_csv('bea_consumption_data.csv')# Read the whole dataset
df=df[df['lowest']==1]# Keep only rows at the lowest level of detail (no high-level categories)
# df = df[df['level'] == 4] # An alternative to look at a medium-level of detail
df['ExpendYear']=df['Expend'].groupby(df['year']).transform('sum')# Find total expenditure in a year
df['ExpendShare']=df['Expend']/df['ExpendYear']# Product expenditure share
df=df.sort_values(by=['expendcode','year'])# Sort everything by expenditure code and year first
df['lagExpendYear']=df.groupby('expendcode')['ExpendYear'].shift(1)# Get *prior* year *total* expenditure
df['GrowthExpendYear']=(df['ExpendYear']-df['lagExpendYear'])/df['lagExpendYear']# % growth in total expend
df['lagPrice']=df.groupby('expendcode')['Price'].shift(1)# Get *prior* year price for the product
df['GrowthPrice']=(df['Price']-df['lagPrice'])/df['lagPrice']# % growth in product price
df['GrowthAfford']=df['GrowthExpendYear']-df['GrowthPrice']# Affordability growth for the product
df['lagExpendShare']=df.groupby('expendcode')['ExpendShare'].shift(1)# Get *prior* year price for the product
df['wtdGrowthAfford']=df['lagExpendShare']*df['GrowthAfford']# Product contribution to total growth
df['GrowthYear']=df['wtdGrowthAfford'].groupby(df['year']).transform('sum')# Calculate total growth for the year
# by summing the weighted affordability growth across all the products
df.to_csv('bea_prepared_data.csv',index=False)# Save this limited dataset
# Do the same calculation of total growth into a smaller data frame
growth_df=df.groupby('year')['wtdGrowthAfford'].sum().reset_index()growth_df.to_csv('bea_growth_data.csv',index=False)# Save this limited dataset
# Create a crude figure of overall affordability growth
fig=px.line(growth_df,x="year",y="wtdGrowthAfford")fig.show()# We have a *lot* of product codes to work with
counts=df['expendcode'].nunique()print('Number of unique expenditure codes:',counts)# Create a subset of data for the 4-8 products you choose
product_codes=['DLUGRC','DMCYRC','DMLTRC','DORSRC']# You select a few of these codes to look at
subset_df=df[df['expendcode'].isin(product_codes)]# Get only the data related to those expenditure codes
subset_df.to_csv('bea_subset_data.csv',index=False)# Save this limited dataset
# Create a crude figure showing affordability growth for your selected serise
fig=px.line(subset_df,x="year",y="GrowthAfford",color='serieslabel')fig.show()# Reshape the data so we can do a fancier figure with Datawrapper
reshaped_df=subset_df.pivot(index='year',columns='serieslabel',values='GrowthAfford')# reshape how the data are stored
reshaped_df=reshaped_df.reset_index()# this is a weird thing we need to do
reshaped_df.to_csv('bea_reshaped_data.csv',index=False)# save the reshaped data