// This file creates the data sets for figure 6 (Panel A, B) // Author: eddie yu // Date: 2023-10-16 /* Data sources: -NETS */ * starting point for panel A is 2-main_analysis/es_county_aw (specification 2) * starting point for panel B is 2-main_analysis/Fig_7_20180122_shading clear all set more off snapshot erase _all ************************************ * Grabbing and organizing the data for panel A ************************************ // A.0. Get regional crosswalk read use "$xwalk/cw_cty_czone.dta", clear rename cty_fips pid duplicates drop tempfile cross save "`cross'" // A.0. Get link data use "$output_NETS/pr_link_est_countyXindustry_d.dta", clear gen pr_emp = pr_link*total if industry_cd < 13 bys fips: egen tot_emp = total(total) bys fips: egen tot_pr = total(pr_emp) gen pr_l = tot_pr/tot_emp keep pr_l fips rename fips pid rename pr_l pr_link_ind duplicates drop tempfile pr save "`pr'" use "$output_NETS/pr_link_est_county_d.dta", clear rename fips pid tempfile pr2 save "`pr2'" // A.1. Get QCEW data use "$qcewdata/extract_qcew_1990_2012_naic3.dta", clear keep if strlen(industry_code) == 3 drop if industry_code == "101" | industry_code == "102" order fips_state fips_county year industry_code drop if fips_county == 0 & fips_state != 72 drop if fips_state == 72 & fips_county != 0 keep if own_code == 5 // 0-total covered; 1 Federal; 2 State; 3 Local; 4 Intl Govt; 5 Private. keep fips_state fips_county year industry_code annual_avg_estabs annual_avg_emplvl total_annual_wages drop if fips_state == 78 collapse (sum) annual_avg_estabs (sum) annual_avg_emplvl (sum) total_annual_wages, by(fips_state fips_county year industry_code) // A.3. Fill in gaps gen pid = 1000*fips_state+fips_county // A.6. Merge in pr link merge m:1 pid using "`pr'" keep if _merg == 3 drop _merge merge m:1 pid using "`pr2'" drop if year == . drop if pr_link == . drop _merge rename pr_link pr_link_all *replace pr_link_all = pr_link_ind sum pr_link_al, d drop if year == . *collapse (rawsum) annual_avg_estabs (rawsum) annual_avg_emplvl (rawsum) total_annual_wages (mean) pr_link_ind (mean) pr_link_all [fw=total] , by( fips_state pid year industry_code) // Dan: this isn't actually collapsing anything egen pid2 = group(pid indus) tsset pid2 year tsfill, full foreach var of varlist fips_state { bysort pid2: egen temp = max(`var') replace `var' = temp drop temp } foreach var of varlist annual_avg_estabs annual_avg_emplvl total_annual_wages { replace `var' = 0 if `var' == . } // A.4. Growth rates. Base year: 1995 gen temp = annual_avg_emplvl if year == 1995 bysort pid2: egen base_emp = max(temp) drop temp gen emp_growth = (annual_avg_emplvl - base_emp)/base replace emp_growth = 0 if emp_growth == . // Income gen inc = total_annual_wages * /annual_avg_emplvl gen temp = inc if year == 1995 bysort pid2: egen base_inc = max(temp) drop temp gen inc_growth = (inc - base_inc)/base_inc replace inc_growth = 0 if inc_growth == . // Estabs gen estab = annual_avg_estabs gen temp = estab if year == 1995 bysort pid2: egen base_estab = max(temp) drop temp gen estab_growth = (estab - base_estab)/base_estab replace estab_growth = 0 if estab_growth == . ** For now, keep only balanced industries . Matt: can you fix it so we don't have to do this? bys pid2: gen count = _N keep if count == 23 drop count drop if base_emp ==0 sum base_emp if year == 1995, d gen wgt=base_emp/(r(N) * r(mean)) winsor wgt, p(.01) gen(wgt_w) sum pr_link_i $pr_wgt if base_emp > 0, d replace pr_link_i = pr_link_i/(r(p75)-r(p25)) // this line only changes sum pr_link_al $pr_wgt if base_emp > 0, d replace pr_link_al = pr_link_al/(r(p75)-r(p25)) tsset pid2 year egen ind_st = group(ind fips_state) *keep if base_emp > 400 winsor base_emp , g(w_emp_base) p(.025) *replace base_emp = w_emp_base sum base_emp, d destring industry_code, replace * keeping variables and saving keep emp_growth pr_link_a pr_link_i year wgt pid industr fips_state base_emp save "$data/Replication Data/figure6_data_A", replace ************************************ * Grabbing and organizing the data for panel B ************************************ // A.0. Get regional crosswalk read use "$xwalk/cw_cty_czone.dta", clear rename cty_fips pid duplicates drop tempfile cross save "`cross'" // A.0. Get link data use "$output_NETS/pr_link_est_countyXindustry_d.dta", clear gen pr_emp = pr_link*total if industry_cd < 13 bys fips: egen tot_emp = total(total) bys fips: egen tot_pr = total(pr_emp) gen pr_l = tot_pr/tot_emp keep pr_l fips rename fips pid rename pr_l pr_link_ind duplicates drop tempfile pr save "`pr'" use "$output_NETS/pr_link_est_county_d.dta", clear rename fips pid tempfile pr2 save "`pr2'" // A.1. Get QCEW data use "$qcewdata/extract_qcew_1990_2012_naic3.dta", clear keep if strlen(industry_code) == 3 drop if industry_code == "101" | industry_code == "102" order fips_state fips_county year industry_code drop if fips_county == 0 & fips_state != 72 drop if fips_state == 72 & fips_county != 0 keep if own_code == 5 // 0-total covered; 1 Federal; 2 State; 3 Local; 4 Intl Govt; 5 Private. keep fips_state fips_county year industry_code annual_avg_estabs annual_avg_emplvl total_annual_wages drop if fips_state == 78 collapse (sum) annual_avg_estabs (sum) annual_avg_emplvl (sum) total_annual_wages, by(fips_state fips_county year industry_code) // A.3. Fill in gaps gen pid = 1000*fips_state+fips_county // A.6. Merge in pr link merge m:1 pid using "`pr'" keep if _merg == 3 drop _merge merge m:1 pid using "`pr2'" drop if year == . drop if pr_link == . drop _merge rename pr_link pr_link_all *replace pr_link_all = pr_link_ind sum pr_link_al, d drop if year == . *collapse (rawsum) annual_avg_estabs (rawsum) annual_avg_emplvl (rawsum) total_annual_wages (mean) pr_link_ind (mean) pr_link_all [fw=total] , by( fips_state pid year industry_code) // Dan: this isn't actually collapsing anything egen pid2 = group(pid indus) tsset pid2 year tsfill, full foreach var of varlist fips_state { bysort pid2: egen temp = max(`var') replace `var' = temp drop temp } foreach var of varlist annual_avg_estabs annual_avg_emplvl total_annual_wages { replace `var' = 0 if `var' == . } // A.4. Growth rates. Base year: 1995 gen temp = annual_avg_emplvl if year == 1995 bysort pid2: egen base_emp = max(temp) drop temp gen emp_growth = (annual_avg_emplvl - base_emp)/base replace emp_growth = 0 if emp_growth == . // Income gen inc = total_annual_wages * /annual_avg_emplvl gen temp = inc if year == 1995 bysort pid2: egen base_inc = max(temp) drop temp gen inc_growth = (inc - base_inc)/base_inc replace inc_growth = 0 if inc_growth == . // Estabs gen estab = annual_avg_estabs gen temp = estab if year == 1995 bysort pid2: egen base_estab = max(temp) drop temp gen estab_growth = (estab - base_estab)/base_estab replace estab_growth = 0 if estab_growth == . bys pid2: gen count = _N keep if count == 23 drop count drop if base_emp ==0 sum base_emp if year == 1995, d gen wgt=base_emp/(r(N) * r(mean)) winsor wgt, p(.01) gen(wgt_w) sum pr_link_i $pr_wgt if base_emp > 0, d replace pr_link_i = pr_link_i/(r(p75)-r(p25)) sum pr_link_al $pr_wgt if base_emp > 0, d replace pr_link_al = pr_link_al/(r(p75)-r(p25)) tsset pid2 year egen ind_st = group(ind fips_state) * keeping variables and saving save "$data/Replication Data/figure6_data_B", replace