Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// This file creates the data sets for figure 6 (Panel A, B)
// Author: eddie yu
// Date: 2023-10-16
/* Data sources:
-NETS
*/
* starting point for panel A is 2-main_analysis/es_county_aw (specification 2)
* starting point for panel B is 2-main_analysis/Fig_7_20180122_shading
clear all
set more off
snapshot erase _all
************************************
* Grabbing and organizing the data for panel A
************************************
// A.0. Get regional crosswalk read
use "$xwalk/cw_cty_czone.dta", clear
rename cty_fips pid
duplicates drop
tempfile cross
save "`cross'"
// A.0. Get link data
use "$output_NETS/pr_link_est_countyXindustry_d.dta", clear
gen pr_emp = pr_link*total if industry_cd < 13
bys fips: egen tot_emp = total(total)
bys fips: egen tot_pr = total(pr_emp)
gen pr_l = tot_pr/tot_emp
keep pr_l fips
rename fips pid
rename pr_l pr_link_ind
duplicates drop
tempfile pr
save "`pr'"
use "$output_NETS/pr_link_est_county_d.dta", clear
rename fips pid
tempfile pr2
save "`pr2'"
// A.1. Get QCEW data
use "$qcewdata/extract_qcew_1990_2012_naic3.dta", clear
keep if strlen(industry_code) == 3
drop if industry_code == "101" | industry_code == "102"
order fips_state fips_county year industry_code
drop if fips_county == 0 & fips_state != 72
drop if fips_state == 72 & fips_county != 0
keep if own_code == 5 // 0-total covered; 1 Federal; 2 State; 3 Local; 4 Intl Govt; 5 Private.
keep fips_state fips_county year industry_code annual_avg_estabs annual_avg_emplvl total_annual_wages
drop if fips_state == 78
collapse (sum) annual_avg_estabs (sum) annual_avg_emplvl (sum) total_annual_wages, by(fips_state fips_county year industry_code)
// A.3. Fill in gaps
gen pid = 1000*fips_state+fips_county
// A.6. Merge in pr link
merge m:1 pid using "`pr'"
keep if _merg == 3
drop _merge
merge m:1 pid using "`pr2'"
drop if year == .
drop if pr_link == .
drop _merge
rename pr_link pr_link_all
*replace pr_link_all = pr_link_ind
sum pr_link_al, d
drop if year == .
*collapse (rawsum) annual_avg_estabs (rawsum) annual_avg_emplvl (rawsum) total_annual_wages (mean) pr_link_ind (mean) pr_link_all [fw=total] , by( fips_state pid year industry_code) // Dan: this isn't actually collapsing anything
egen pid2 = group(pid indus)
tsset pid2 year
tsfill, full
foreach var of varlist fips_state {
bysort pid2: egen temp = max(`var')
replace `var' = temp
drop temp
}
foreach var of varlist annual_avg_estabs annual_avg_emplvl total_annual_wages {
replace `var' = 0 if `var' == .
}
// A.4. Growth rates. Base year: 1995
gen temp = annual_avg_emplvl if year == 1995
bysort pid2: egen base_emp = max(temp)
drop temp
gen emp_growth = (annual_avg_emplvl - base_emp)/base
replace emp_growth = 0 if emp_growth == .
// Income
gen inc = total_annual_wages
* /annual_avg_emplvl
gen temp = inc if year == 1995
bysort pid2: egen base_inc = max(temp)
drop temp
gen inc_growth = (inc - base_inc)/base_inc
replace inc_growth = 0 if inc_growth == .
// Estabs
gen estab = annual_avg_estabs
gen temp = estab if year == 1995
bysort pid2: egen base_estab = max(temp)
drop temp
gen estab_growth = (estab - base_estab)/base_estab
replace estab_growth = 0 if estab_growth == .
** For now, keep only balanced industries . Matt: can you fix it so we don't have to do this?
bys pid2: gen count = _N
keep if count == 23
drop count
drop if base_emp ==0
sum base_emp if year == 1995, d
gen wgt=base_emp/(r(N) * r(mean))
winsor wgt, p(.01) gen(wgt_w)
sum pr_link_i $pr_wgt if base_emp > 0, d
replace pr_link_i = pr_link_i/(r(p75)-r(p25)) // this line only changes
sum pr_link_al $pr_wgt if base_emp > 0, d
replace pr_link_al = pr_link_al/(r(p75)-r(p25))
tsset pid2 year
egen ind_st = group(ind fips_state)
*keep if base_emp > 400
winsor base_emp , g(w_emp_base) p(.025)
*replace base_emp = w_emp_base
sum base_emp, d
destring industry_code, replace
* keeping variables and saving
keep emp_growth pr_link_a pr_link_i year wgt pid industr fips_state base_emp
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
save "$data/Replication Data/figure6_data_A", replace
************************************
* Grabbing and organizing the data for panel B
************************************
// A.0. Get regional crosswalk read
use "$xwalk/cw_cty_czone.dta", clear
rename cty_fips pid
duplicates drop
tempfile cross
save "`cross'"
// A.0. Get link data
use "$output_NETS/pr_link_est_countyXindustry_d.dta", clear
gen pr_emp = pr_link*total if industry_cd < 13
bys fips: egen tot_emp = total(total)
bys fips: egen tot_pr = total(pr_emp)
gen pr_l = tot_pr/tot_emp
keep pr_l fips
rename fips pid
rename pr_l pr_link_ind
duplicates drop
tempfile pr
save "`pr'"
use "$output_NETS/pr_link_est_county_d.dta", clear
rename fips pid
tempfile pr2
save "`pr2'"
// A.1. Get QCEW data
use "$qcewdata/extract_qcew_1990_2012_naic3.dta", clear
keep if strlen(industry_code) == 3
drop if industry_code == "101" | industry_code == "102"
order fips_state fips_county year industry_code
drop if fips_county == 0 & fips_state != 72
drop if fips_state == 72 & fips_county != 0
keep if own_code == 5 // 0-total covered; 1 Federal; 2 State; 3 Local; 4 Intl Govt; 5 Private.
keep fips_state fips_county year industry_code annual_avg_estabs annual_avg_emplvl total_annual_wages
drop if fips_state == 78
collapse (sum) annual_avg_estabs (sum) annual_avg_emplvl (sum) total_annual_wages, by(fips_state fips_county year industry_code)
// A.3. Fill in gaps
gen pid = 1000*fips_state+fips_county
// A.6. Merge in pr link
merge m:1 pid using "`pr'"
keep if _merg == 3
drop _merge
merge m:1 pid using "`pr2'"
drop if year == .
drop if pr_link == .
drop _merge
rename pr_link pr_link_all
*replace pr_link_all = pr_link_ind
sum pr_link_al, d
drop if year == .
*collapse (rawsum) annual_avg_estabs (rawsum) annual_avg_emplvl (rawsum) total_annual_wages (mean) pr_link_ind (mean) pr_link_all [fw=total] , by( fips_state pid year industry_code) // Dan: this isn't actually collapsing anything
egen pid2 = group(pid indus)
tsset pid2 year
tsfill, full
foreach var of varlist fips_state {
bysort pid2: egen temp = max(`var')
replace `var' = temp
drop temp
}
foreach var of varlist annual_avg_estabs annual_avg_emplvl total_annual_wages {
replace `var' = 0 if `var' == .
}
// A.4. Growth rates. Base year: 1995
gen temp = annual_avg_emplvl if year == 1995
bysort pid2: egen base_emp = max(temp)
drop temp
gen emp_growth = (annual_avg_emplvl - base_emp)/base
replace emp_growth = 0 if emp_growth == .
// Income
gen inc = total_annual_wages
* /annual_avg_emplvl
gen temp = inc if year == 1995
bysort pid2: egen base_inc = max(temp)
drop temp
gen inc_growth = (inc - base_inc)/base_inc
replace inc_growth = 0 if inc_growth == .
// Estabs
gen estab = annual_avg_estabs
gen temp = estab if year == 1995
bysort pid2: egen base_estab = max(temp)
drop temp
gen estab_growth = (estab - base_estab)/base_estab
replace estab_growth = 0 if estab_growth == .
bys pid2: gen count = _N
keep if count == 23
drop count
drop if base_emp ==0
sum base_emp if year == 1995, d
gen wgt=base_emp/(r(N) * r(mean))
winsor wgt, p(.01) gen(wgt_w)
sum pr_link_i $pr_wgt if base_emp > 0, d
replace pr_link_i = pr_link_i/(r(p75)-r(p25))
sum pr_link_al $pr_wgt if base_emp > 0, d
replace pr_link_al = pr_link_al/(r(p75)-r(p25))
tsset pid2 year
egen ind_st = group(ind fips_state)
* keeping variables and saving
save "$data/Replication Data/figure6_data_B", replace