Skip to content
Snippets Groups Projects
figure6_clean_pr_link_raw.do 7.28 KiB
Newer Older
// This file creates the data sets for figure 6 (Panel A, B)
// Author: eddie yu
// Date: 2023-10-16

/* Data sources:
-NETS
*/

* starting point for panel A is 2-main_analysis/es_county_aw (specification 2)
* starting point for panel B is 2-main_analysis/Fig_7_20180122_shading

clear all
set more off 
snapshot erase _all

************************************
* Grabbing and organizing the data for panel A
************************************
// A.0. Get regional crosswalk read 
use "$xwalk/cw_cty_czone.dta", clear
rename cty_fips pid 
duplicates drop 
tempfile cross
save "`cross'"

// A.0. Get link data 
use "$output_NETS/pr_link_est_countyXindustry_d.dta", clear 
gen pr_emp = pr_link*total if industry_cd < 13
bys fips: egen tot_emp = total(total) 
bys fips: egen tot_pr = total(pr_emp)

gen pr_l = tot_pr/tot_emp 
keep pr_l fips
rename  fips pid
rename pr_l pr_link_ind
duplicates drop 
tempfile pr 
save "`pr'" 

use "$output_NETS/pr_link_est_county_d.dta", clear 
rename  fips pid
tempfile pr2 
save "`pr2'" 

// A.1. Get QCEW data 
use "$qcewdata/extract_qcew_1990_2012_naic3.dta", clear
keep if strlen(industry_code) == 3
drop if industry_code == "101" | industry_code == "102"

order fips_state fips_county year industry_code
drop if fips_county == 0 & fips_state != 72
drop if fips_state == 72 & fips_county != 0
keep if own_code == 5		// 0-total covered; 1 Federal; 2 State; 3 Local; 4 Intl Govt; 5 Private.
keep fips_state fips_county year industry_code annual_avg_estabs annual_avg_emplvl total_annual_wages
drop if fips_state == 78
collapse (sum) annual_avg_estabs (sum) annual_avg_emplvl (sum) total_annual_wages, by(fips_state fips_county year industry_code)

// A.3. Fill in gaps
gen pid = 1000*fips_state+fips_county

// A.6. Merge in pr link 
merge m:1 pid using "`pr'"
keep if _merg == 3
drop _merge 

merge m:1 pid using "`pr2'"
drop if year == . 
drop if pr_link == . 
drop _merge 
rename pr_link pr_link_all

*replace pr_link_all = pr_link_ind

sum pr_link_al, d 

drop if year == . 

*collapse (rawsum) annual_avg_estabs (rawsum) annual_avg_emplvl (rawsum) total_annual_wages (mean) pr_link_ind (mean) pr_link_all [fw=total] , by( fips_state pid year industry_code) // Dan: this isn't actually collapsing anything

egen pid2 = group(pid indus)

tsset pid2 year
tsfill, full

foreach var of varlist fips_state  {
bysort pid2: egen temp = max(`var')
replace `var' = temp
drop temp
}

foreach var of varlist annual_avg_estabs annual_avg_emplvl total_annual_wages {
replace `var' = 0 if `var' == .
}

// A.4.  Growth rates. Base year: 1995
gen temp = annual_avg_emplvl if year == 1995
bysort pid2: egen base_emp = max(temp)
drop temp

gen emp_growth = (annual_avg_emplvl - base_emp)/base
replace emp_growth = 0 if emp_growth == . 


// Income 
gen inc = total_annual_wages
* /annual_avg_emplvl
gen temp = inc if year == 1995
bysort pid2: egen base_inc = max(temp)
drop temp

gen inc_growth = (inc - base_inc)/base_inc
replace inc_growth = 0 if inc_growth == . 

// Estabs 
gen estab = annual_avg_estabs
gen temp = estab if year == 1995
bysort pid2: egen base_estab = max(temp)
drop temp

gen estab_growth = (estab - base_estab)/base_estab
replace estab_growth = 0 if estab_growth == . 

** For now, keep only balanced industries . Matt: can you fix it so we don't have to do this? 
bys pid2: gen count = _N
keep if count == 23 
drop count 

drop if base_emp ==0 

sum base_emp if year == 1995, d
gen wgt=base_emp/(r(N) * r(mean))
winsor wgt, p(.01) gen(wgt_w)

sum pr_link_i $pr_wgt if base_emp > 0, d 
replace pr_link_i = pr_link_i/(r(p75)-r(p25)) // this line only changes

sum pr_link_al  $pr_wgt if base_emp > 0, d 
replace pr_link_al = pr_link_al/(r(p75)-r(p25))

tsset pid2 year 
egen ind_st = group(ind fips_state)

*keep if base_emp > 400  
winsor base_emp , g(w_emp_base) p(.025)
*replace base_emp = w_emp_base
sum base_emp, d 
destring industry_code, replace


* keeping variables and saving 
keep emp_growth pr_link_a pr_link_i year wgt pid industr fips_state base_emp
save "$data/Replication Data/figure6_data_A", replace


************************************
* Grabbing and organizing the data for panel B
************************************
// A.0. Get regional crosswalk read 
use "$xwalk/cw_cty_czone.dta", clear
rename cty_fips pid 
duplicates drop 
tempfile cross
save "`cross'"

// A.0. Get link data 
use "$output_NETS/pr_link_est_countyXindustry_d.dta", clear 
gen pr_emp = pr_link*total if industry_cd < 13
bys fips: egen tot_emp = total(total) 
bys fips: egen tot_pr = total(pr_emp)

gen pr_l = tot_pr/tot_emp 
keep pr_l fips
rename  fips pid
rename pr_l pr_link_ind
duplicates drop 
tempfile pr 
save "`pr'"

use "$output_NETS/pr_link_est_county_d.dta", clear 
rename  fips pid
tempfile pr2 
save "`pr2'" 

// A.1. Get QCEW data 
use "$qcewdata/extract_qcew_1990_2012_naic3.dta", clear
keep if strlen(industry_code) == 3
drop if industry_code == "101" | industry_code == "102"

order fips_state fips_county year industry_code
drop if fips_county == 0 & fips_state != 72
drop if fips_state == 72 & fips_county != 0
keep if own_code == 5		// 0-total covered; 1 Federal; 2 State; 3 Local; 4 Intl Govt; 5 Private.
keep fips_state fips_county year industry_code annual_avg_estabs annual_avg_emplvl total_annual_wages
drop if fips_state == 78

collapse (sum) annual_avg_estabs (sum) annual_avg_emplvl (sum) total_annual_wages, by(fips_state fips_county year industry_code)

// A.3. Fill in gaps
gen pid = 1000*fips_state+fips_county

// A.6. Merge in pr link 
merge m:1 pid using "`pr'"
keep if _merg == 3
drop _merge 

merge m:1 pid using "`pr2'"
drop if year == . 
drop if pr_link == . 
drop _merge 
rename pr_link pr_link_all

*replace pr_link_all = pr_link_ind

sum pr_link_al, d 

drop if year == . 

*collapse (rawsum) annual_avg_estabs (rawsum) annual_avg_emplvl (rawsum) total_annual_wages (mean) pr_link_ind (mean) pr_link_all [fw=total] , by( fips_state pid year industry_code) // Dan: this isn't actually collapsing anything

egen pid2 = group(pid indus)

tsset pid2 year
tsfill, full

foreach var of varlist fips_state  {
bysort pid2: egen temp = max(`var')
replace `var' = temp
drop temp
}

foreach var of varlist annual_avg_estabs annual_avg_emplvl total_annual_wages {
replace `var' = 0 if `var' == .
}

// A.4.  Growth rates. Base year: 1995
gen temp = annual_avg_emplvl if year == 1995
bysort pid2: egen base_emp = max(temp)
drop temp

gen emp_growth = (annual_avg_emplvl - base_emp)/base
replace emp_growth = 0 if emp_growth == . 


// Income 
gen inc = total_annual_wages
* /annual_avg_emplvl
gen temp = inc if year == 1995
bysort pid2: egen base_inc = max(temp)
drop temp

gen inc_growth = (inc - base_inc)/base_inc
replace inc_growth = 0 if inc_growth == . 

// Estabs 
gen estab = annual_avg_estabs
gen temp = estab if year == 1995
bysort pid2: egen base_estab = max(temp)
drop temp

gen estab_growth = (estab - base_estab)/base_estab
replace estab_growth = 0 if estab_growth == . 

bys pid2: gen count = _N
keep if count == 23 
drop count 

drop if base_emp ==0 

sum base_emp if year == 1995, d
gen wgt=base_emp/(r(N) * r(mean))
winsor wgt, p(.01) gen(wgt_w)

sum pr_link_i $pr_wgt if base_emp > 0, d 
replace pr_link_i = pr_link_i/(r(p75)-r(p25))

sum pr_link_al  $pr_wgt if base_emp > 0, d 
replace pr_link_al = pr_link_al/(r(p75)-r(p25))

tsset pid2 year 
egen ind_st = group(ind fips_state)

* keeping variables and saving 
save "$data/Replication Data/figure6_data_B", replace