
. 
. 
. ********************************************************************************
. * Section 1: Clean Glassdoor OC Measures
. ********************************************************************************
. 
. * ── Interview-based measures ─────────────────────────────────────────────────
. 
. import delimited "Raw/final_df_interview.csv", clear
(encoding automatically selected: ISO-8859-1)
(6 vars, 13,303 obs)

. drop v1

. drop if missing(sim2chatgpt_avg)
(401 observations deleted)

. egen std_int_seed=std(sim_avg)

. egen std_int_chatgpt=std(sim2chatgpt_avg)

. egen std_int_avg=rowmean(std_int_seed std_int_chatgpt)

. xtile pct_int_seed=std_int_seed, n(100)

. xtile pct_int_chatgpt=std_int_chatgpt, n(100)

. xtile pct_int_avg=std_int_avg, n(100)

. save "Intermediate/final_df_interview.dta", replace
file Intermediate/final_df_interview.dta saved

. 
. * ── Review-based measures ────────────────────────────────────────────────────
. 
. import delimited "Raw/final_df.csv", clear
(encoding automatically selected: ISO-8859-1)
(9 vars, 16,743 obs)

. 
. drop pros_sim_sum cons_sim_sum

. gen net_seed=pros_sim_avg-cons_sim_avg

. gen net_chatgpt=pros_sim2chatgpt_avg-cons_sim2chatgpt_avg
(240 missing values generated)

. 
. egen std_net_chatgpt=std(net_chatgpt)
(240 missing values generated)

. egen std_net_seed=std(net_seed)

. 
. egen std_net_avg=rowmean(std_net_chatgpt std_net_seed)

. 
. xtile pct_chatgpt=net_chatgpt, n(100)

. xtile pct_seed=net_seed, n(100)

. xtile pct_avg=std_net_avg, n(100)

. 
. save "Intermediate/final_df.dta", replace
file Intermediate/final_df.dta saved

. 
. 
. ********************************************************************************
. * Section 2: Base Sample Construction
. ********************************************************************************
. 
. * ── Merge Compustat-CRSP with Glassdoor and OC ──────────────────────────────
. 
. use "Raw/compustat.dta", clear

. drop if missing(fyear)
(1 observation deleted)

. duplicates report LPERMNO fyear

Duplicates in terms of LPERMNO fyear

--------------------------------------
   Copies | Observations       Surplus
----------+---------------------------
        1 |        71473             0
        2 |           10             5
--------------------------------------

. duplicates tag LPERMNO fyear, gen(tag)

Duplicates in terms of LPERMNO fyear

. drop if tag>0
(10 observations deleted)

. drop tag

. rename LPERMNO permno

. rename fyear year

. merge 1:1 permno year using "Intermediate/final_df.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        56,216
        from master                    55,473  (_merge==1)
        from using                        743  (_merge==2)

    Matched                            16,000  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        56,216
>         from master                    55,473  (_merge==1)
>         from using                        743  (_merge==2)
> 
>     Matched                            16,000  (_merge==3)
>     -----------------------------------------
> */
. keep if _m==3
(56,216 observations deleted)

. drop _m

. duplicates report GVKEY year

Duplicates in terms of GVKEY year

--------------------------------------
   Copies | Observations       Surplus
----------+---------------------------
        1 |        16000             0
--------------------------------------

. destring GVKEY, gen(gvkey)
GVKEY: all characters numeric; gvkey generated as long

. rename year fyear

. merge 1:1 gvkey fyear using  "Intermediate/OC_compustat.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                       280,007
        from master                        26  (_merge==1)
        from using                    279,981  (_merge==2)

    Matched                            15,974  (_merge==3)
    -----------------------------------------

. /*
> 
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                       280,007
>         from master                        26  (_merge==1)
>         from using                    279,981  (_merge==2)
> 
>     Matched                            15,974  (_merge==3)
>     -----------------------------------------
> 
> */
. drop if _m==2
(279,981 observations deleted)

. drop _merge

. egen oc_w_std=std(oc_w)
(4,862 missing values generated)

. 
. save "Intermediate/temp.dta", replace
file Intermediate/temp.dta saved

. 
. * ── Merge CEO turnover ──────────────────────────────────────────────────────
. 
. use "Intermediate/temp.dta", clear

. rename fyear year

. merge 1:1 gvkey year using "Intermediate/new_ceo_firm_year_1995_2021.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        35,684
        from master                       344  (_merge==1)
        from using                     35,340  (_merge==2)

    Matched                            15,656  (_merge==3)
    -----------------------------------------

. 
. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        35,684
>         from master                       344  (_merge==1)
>         from using                     35,340  (_merge==2)
> 
>     Matched                            15,656  (_merge==3)
>     -----------------------------------------
> */
. keep if _m==3
(35,684 observations deleted)

. drop _m

. 
. save "Intermediate/temp2.dta", replace
file Intermediate/temp2.dta saved

. 
. * ── Merge Glassdoor ratings ─────────────────────────────────────────────────
. 
. import delimited "Raw/glassdoor_ratings.csv", clear
(encoding automatically selected: ISO-8859-1)
(11 vars, 16,743 obs)

. drop v1

. merge 1:1 permno year using "Intermediate/temp2.dta"
(variable permno was long, now double to accommodate using data's values)
(variable year was int, now double to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                         1,087
        from master                     1,087  (_merge==1)
        from using                          0  (_merge==2)

    Matched                            15,656  (_merge==3)
    -----------------------------------------

. /*    Result                      Number of obs
>     -----------------------------------------
>     Not matched                         1,087
>         from master                     1,087  (_merge==1)
>         from using                          0  (_merge==2)
> 
>     Matched                            15,656  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==1
(1,087 observations deleted)

. drop _merge

. save "Intermediate/temp3.dta", replace
file Intermediate/temp3.dta saved

. 
. * ── Merge best companies list ───────────────────────────────────────────────
. 
. use "Intermediate/temp3.dta", clear

. destring cik, gen(CIK)
cik: all characters numeric; CIK generated as long
(36 missing values generated)

. merge m:1 CIK year using "Raw/2014-2020 best CIK companies to work in.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        15,855
        from master                    15,486  (_merge==1)
        from using                        369  (_merge==2)

    Matched                               170  (_merge==3)
    -----------------------------------------

. drop if _m==2
(369 observations deleted)

. drop _m

. replace bestcompany=0 if missing(bestcompany)
(15,486 real changes made)

. 
. rename companyid firmid

. 
. save "Intermediate/temp3.dta", replace
file Intermediate/temp3.dta saved

. 
. * ── Merge ESG ratings (Sustainalytics, KLD) and CRSP returns ────────────────
. 
. use "Intermediate/temp3.dta", clear

. gen cusip6=substr(cusip,1,6)

. merge 1:1 cusip6 year using "Raw/Sustainalytics to merge.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        23,553
        from master                     9,301  (_merge==1)
        from using                     14,252  (_merge==2)

    Matched                             6,355  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        23,553
>         from master                     9,301  (_merge==1)
>         from using                     14,252  (_merge==2)
> 
>     Matched                             6,355  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(14,252 observations deleted)

. drop _m

. merge 1:1 cusip6 year using "Raw/KLD.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        28,914
        from master                     5,164  (_merge==1)
        from using                     23,750  (_merge==2)

    Matched                            10,492  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        28,914
>         from master                     5,164  (_merge==1)
>         from using                     23,750  (_merge==2)
> 
>     Matched                            10,492  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(23,750 observations deleted)

. drop _m

. 
. merge 1:1 permno year using "Intermediate/crsp_annual.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                       144,745
        from master                        16  (_merge==1)
        from using                    144,729  (_merge==2)

    Matched                            15,640  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                       144,745
>         from master                        16  (_merge==1)
>         from using                    144,729  (_merge==2)
> 
>     Matched                            15,640  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(144,729 observations deleted)

. drop _m

. 
. 
. gen sic2=substr(sic,1,2)

. destring sic2, replace
sic2: all characters numeric; replaced as byte

. egen sic2year=group(sic2 year)

. 
. gen roe=ni/ceq*100
(3 missing values generated)

. 
. save "Intermediate/temp4.dta", replace
file Intermediate/temp4.dta saved

. 
. 
. ********************************************************************************
. * Section 3: Merge Employment, Financial, and Alternative Measures
. ********************************************************************************
. 
. * ── Cost of goods sold and employment ────────────────────────────────────────
. 
. use "Raw/cogs_empl.dta", clear

. keep gvkey fyear emp cogs

. drop if missing(fyear)
(242 observations deleted)

. duplicates drop

Duplicates in terms of all variables

(10 observations deleted)

. duplicates report gvkey fyear

Duplicates in terms of gvkey fyear

--------------------------------------
   Copies | Observations       Surplus
----------+---------------------------
        1 |       147727             0
        2 |            2             1
--------------------------------------

. duplicates tag gvkey fyear, gen(tag)

Duplicates in terms of gvkey fyear

. drop if tag==1 & missing(emp)
(1 observation deleted)

. drop tag

. rename fyear year

. destring gvkey, replace
gvkey: all characters numeric; replaced as long

. save "Intermediate/cogs_empl_clean.dta", replace
file Intermediate/cogs_empl_clean.dta saved

. 
. 
. use "Intermediate/temp4.dta", clear

. merge 1:1 gvkey year using "Intermediate/cogs_empl_clean.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                       132,072
        from master                         0  (_merge==1)
        from using                    132,072  (_merge==2)

    Matched                            15,656  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                       132,072
>         from master                         0  (_merge==1)
>         from using                    132,072  (_merge==2)
> 
>     Matched                            15,656  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(132,072 observations deleted)

. drop _m

. gen gross_margin=(sale-cogs)/sale
(5 missing values generated)

. gen labor_productivity=sale/emp
(55 missing values generated)

. replace gross_margin=gross_margin*100
(15,651 real changes made)

. winsor2 gross_margin labor_productivity

. gen ln_labor_productivity=ln(1+labor_productivity)
(57 missing values generated)

. save "Intermediate/temp5.dta", replace
file Intermediate/temp5.dta saved

. 
. * ── Financial variables and winsorization ────────────────────────────────────
. 
. use "Intermediate/temp5.dta", clear

. drop if missing(net_chatgpt)
(210 observations deleted)

. gen leverage=(dltt+dlc)/ceq
(58 missing values generated)

. gen mk2book=mkvalt/ceq
(741 missing values generated)

. winsor2 roe annual_return annual_returnx ret retx leverage mk2book sale_growth

. save "Intermediate/temp6.dta", replace
file Intermediate/temp6.dta saved

. 
. * ── Interview-based OC ──────────────────────────────────────────────────────
. 
. use "Intermediate/temp6.dta", clear

. merge 1:1 permno year using "Intermediate/final_df_interview.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                         4,510
        from master                     3,527  (_merge==1)
        from using                        983  (_merge==2)

    Matched                            11,919  (_merge==3)
    -----------------------------------------

. /*
> 
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                         4,510
>         from master                     3,527  (_merge==1)
>         from using                        983  (_merge==2)
> 
>     Matched                            11,919  (_merge==3)
>     -----------------------------------------
> 
> */
. drop if _m==2
(983 observations deleted)

. drop _m

. save "Intermediate/temp7.dta", replace
file Intermediate/temp7.dta saved

. 
. * ── R&D expenditures and patents ─────────────────────────────────────────────
. 
. use "Raw/Compustat All Raw.dta", clear

. keep GVKEY fyear xrd

. destring GVKEY, gen(gvkey)
GVKEY: all characters numeric; gvkey generated as long

. drop GVKEY

. duplicates drop

Duplicates in terms of all variables

(2,656 observations deleted)

. save "Intermediate/xrd.dta", replace
file Intermediate/xrd.dta saved

. 
. use "Intermediate/temp7.dta", clear

. gen fyear=year

. merge 1:1 gvkey fyear using "Raw/DISCERN_Panel_Data_1980_2015.dta"
(variable fyear was float, now double to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                        69,737
        from master                    12,149  (_merge==1)
        from using                     57,588  (_merge==2)

    Matched                             3,297  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        69,737
>         from master                    12,149  (_merge==1)
>         from using                     57,588  (_merge==2)
> 
>     Matched                             3,297  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(57,588 observations deleted)

. drop _m

. 
. merge 1:1 gvkey fyear using "Intermediate/xrd.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                       301,630
        from master                         0  (_merge==1)
        from using                    301,630  (_merge==2)

    Matched                            15,446  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                       301,630
>         from master                         0  (_merge==1)
>         from using                    301,630  (_merge==2)
> 
>     Matched                            15,446  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(301,630 observations deleted)

. drop _m

. 
. save "Intermediate/temp8.dta", replace
file Intermediate/temp8.dta saved

. 
. * ── Current employee measures ────────────────────────────────────────────────
. 
. import delimited "Raw/final_df_current_employee.csv", clear
(encoding automatically selected: ISO-8859-1)
(9 vars, 11,833 obs)

. 
. keep year permno pros_sim_avg cons_sim_avg pros_sim2chatgpt_avg cons_sim2chatgpt_avg

. gen net_seed=pros_sim_avg-cons_sim_avg

. gen net_chatgpt=pros_sim2chatgpt_avg-cons_sim2chatgpt_avg
(173 missing values generated)

. 
. 
. egen std_net_chatgpt=std(net_chatgpt)
(173 missing values generated)

. egen std_net_seed=std(net_seed)

. 
. egen std_net_avg=rowmean(std_net_chatgpt std_net_seed)

. 
. xtile pct_chatgpt=net_chatgpt, n(100)

. xtile pct_seed=net_seed, n(100)

. xtile pct_avg=std_net_avg, n(100)

. 
. drop pros_sim_avg pros_sim2chatgpt_avg cons_sim_avg cons_sim2chatgpt_avg

. 
. rename * current_*

. rename current_permno permno

. rename current_year year

. 
. drop if missing(current_std_net_chatgpt)
(173 observations deleted)

. drop if missing(current_std_net_seed)
(0 observations deleted)

. 
. save "Intermediate/final_df_current_employee.dta", replace
file Intermediate/final_df_current_employee.dta saved

. 
. 
. use "Intermediate/temp8.dta", clear

. merge 1:1 permno year using "Intermediate/final_df_current_employee.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                         4,924
        from master                     4,355  (_merge==1)
        from using                        569  (_merge==2)

    Matched                            11,091  (_merge==3)
    -----------------------------------------

. drop if _m==2
(569 observations deleted)

. drop _m

. save "Intermediate/temp9.dta", replace
file Intermediate/temp9.dta saved

. 
. * ── Website-based OC ─────────────────────────────────────────────────────────
. 
. import delimited "Raw/final_df_website.csv", clear
(encoding automatically selected: ISO-8859-1)
(5 vars, 38,909 obs)

. drop v1

. rename sim2chatgpt_avg website_chatgpt

. rename sim_avg website_seed

. egen std_website_chatgpt=std(website_chatgpt)
(196 missing values generated)

. egen std_website_seed=std(website_seed)

. egen std_website_avg=rowmean(std_website_chatgpt std_website_seed)

. xtile pct_website_chatgpt=std_website_chatgpt, n(100)

. xtile pct_website_seed=std_website_seed, n(100)

. xtile pct_website_avg=std_website_avg, n(100)

. save "Intermediate/final_df_website.dta", replace
file Intermediate/final_df_website.dta saved

. 
. use "Intermediate/temp9.dta", clear

. merge 1:1 gvkey year using "Intermediate/final_df_website.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        40,257
        from master                     8,397  (_merge==1)
        from using                     31,860  (_merge==2)

    Matched                             7,049  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        40,257
>         from master                     8,397  (_merge==1)
>         from using                     31,860  (_merge==2)
> 
>     Matched                             7,049  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(31,860 observations deleted)

. drop _merge

. save "Intermediate/temp10.dta", replace
file Intermediate/temp10.dta saved

. 
. * ── LinkedIn turnover and salary ─────────────────────────────────────────────
. 
. use "Raw/linkedin gvkey.dta", clear

. collapse (sum)totalinflow=inflow totalcount=count totaloutflow=outflow totalsalary=salarycount, by(gvkey year)

. gen turnover=totaloutflow/totalcount*100
(70 missing values generated)

. gen avgsalary=totalsalary/totalcount
(70 missing values generated)

. save "Intermediate/avgsalary_turnover.dta", replace
file Intermediate/avgsalary_turnover.dta saved

. 
. 
. use "Intermediate/temp10.dta", clear

. merge 1:1 gvkey year using "Intermediate/avgsalary_turnover.dta"
(variable gvkey was long, now double to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                        19,928
        from master                       796  (_merge==1)
        from using                     19,132  (_merge==2)

    Matched                            14,650  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        19,928
>         from master                       796  (_merge==1)
>         from using                     19,132  (_merge==2)
> 
>     Matched                            14,650  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(19,132 observations deleted)

. drop _m

. 
. save "Intermediate/temp11.dta", replace
file Intermediate/temp11.dta saved

. 
. 
. ********************************************************************************
. * Section 4: Lawsuits, Restatements, and Variable Labels
. ********************************************************************************
. 
. * ── Class-action lawsuits ────────────────────────────────────────────────────
. 
. import delimited "Raw/alldata_class_action_lawsuits.csv", varnames(1) clear
(encoding automatically selected: ISO-8859-1)
(5 vars, 6,505 obs)

. gen year=substr(filingdate,-4,4)

. destring year, replace
year: all characters numeric; replaced as int

. gen lawsuit=1

. collapse (sum)lawsuit, by(ticker year)

. drop if missing(ticker)
(28 observations deleted)

. save "Intermediate/lawsuit.dta", replace
file Intermediate/lawsuit.dta saved

. 
. * ── ISS restatements ─────────────────────────────────────────────────────────
. 
. import delimited "Raw/ISS_restate.csv", varnames(1) clear
(encoding automatically selected: ISO-8859-1)
(8 vars, 22,285 obs)

. rename fyear year

. keep gvkey year restate

. drop if missing(gvkey)
(996 observations deleted)

. drop if missing(year)
(0 observations deleted)

. drop if missing(restate)
(0 observations deleted)

. duplicates drop

Duplicates in terms of all variables

(0 observations are duplicates)

. duplicates report gvkey year

Duplicates in terms of gvkey year

--------------------------------------
   Copies | Observations       Surplus
----------+---------------------------
        1 |        21289             0
--------------------------------------

. save "Intermediate/restate.dta", replace
file Intermediate/restate.dta saved

. 
. * ── Merge lawsuits and restatements, generate scaled OC ──────────────────────
. 
. use "Intermediate/temp11.dta", clear

. gen oc_scaled=oc/at*100
(4,594 missing values generated)

. egen oc_scaled_std=std(oc_scaled)
(4,594 missing values generated)

. 
. bysort sic2 year: egen industry_avg_oc_scaled=mean(oc_scaled)
(70 missing values generated)

. gen demeaned_oc_scaled=oc_scaled-industry_avg_oc_scaled
(4,594 missing values generated)

. egen demeaned_oc_scaled_std=std(demeaned_oc_scaled)
(4,594 missing values generated)

. 
. merge 1:1 gvkey year using "Intermediate/restate.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        17,737
        from master                     5,947  (_merge==1)
        from using                     11,790  (_merge==2)

    Matched                             9,499  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        17,737
>         from master                     5,947  (_merge==1)
>         from using                     11,790  (_merge==2)
> 
>     Matched                             9,499  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(11,790 observations deleted)

. drop _m

. replace restate=0 if missing(restate)
(5,947 real changes made)

. 
. drop ticker

. rename tic ticker

. merge 1:1 ticker year using "Intermediate/lawsuit.dta"
(variable ticker was str8, now str10 to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                        20,557
        from master                    14,972  (_merge==1)
        from using                      5,585  (_merge==2)

    Matched                               474  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        20,557
>         from master                    14,972  (_merge==1)
>         from using                      5,585  (_merge==2)
> 
>     Matched                               474  (_merge==3)
>     -----------------------------------------
> 
> */
. drop if _m==2
(5,585 observations deleted)

. drop _m

. replace lawsuit=0 if missing(lawsuit)
(14,972 real changes made)

. gen lawsuitD=(lawsuit>0)

. 
. save "Intermediate/temp13.dta", replace
file Intermediate/temp13.dta saved

. 
. * ── Variable labels ──────────────────────────────────────────────────────────
. 
. use "Intermediate/temp13.dta", clear

. gen ROA=ni/at*100
(1 missing value generated)

. winsor2 ROA

. label var std_net_chatgpt "Org Capital (ChatGPT)"

. label var std_net_seed "Org Capital (Seed Word)"

. label var std_net_avg "Org Capital (Average)"

. label var pct_chatgpt "Pct Org Capital (ChatGPT)"

. label var pct_seed "Pct Org Capital (Seed Word)"

. label var pct_avg "Pct Org Capital (Average)"

. label var demeaned_oc_scaled_std "Org Capital (SG&A)"

. label var overallrating "Overall Rating"

. label var balance "Balance Rating"

. label var culture "Culture Rating"

. label var opportunities "Opportunities Rating"

. label var compensation "Compensation Rating"

. label var management "Management Rating"

. label var sustain_s "Social Score"

. label var sustain_g "Governance Score"

. label var sustain_e "Environmental Score"

. label var sustain_esg "ESG Score"

. label var DIV_str_num "# Diversity Strengths"

. label var DIV_con_num "# Diversity Concerns"

. label var EMP_str_num "# Employee Relation Strengths"

. label var EMP_con_num "# Employee Relation Concerns"

. label var ROA_w "ROA"

. label var roe_w "ROE"

. label var mk2book_w "P/B Ratio"

. label var lnat "Ln(Total Assets)"

. label var mm "Firm Age"

. label var sale_growth_w "Sales Growth"

. label var leverage_w "Leverage"

. label var turnover "Employee Turnover"

. label var avgsalary "Average Employee Compensation"

. 
. label var std_int_chatgpt "Interview Org Capital (ChatGPT)"

. label var std_int_seed "Interview Org Capital (Seed Word)"

. label var std_int_avg "Interview Org Capital (Average)"

. label var pct_int_chatgpt "Interview Pct Org Capital (ChatGPT)"

. label var pct_int_seed "Interview Pct Org Capital (Seed Word)"

. label var pct_int_avg "Interview Pct Org Capital (Average)"

. 
. save "Intermediate/temp14.dta", replace
file Intermediate/temp14.dta saved

. 
. 
. use "Intermediate/temp14.dta", clear

. label var std_net_chatgpt "\parbox{2cm}{\centering Org Capital (ChatGPT)}"

. label var std_net_seed "\parbox{2cm}{\centering Org Capital (Seed Word)}"

. label var std_net_avg "\parbox{2cm}{\centering Org Capital (Average)}"

. label var pct_chatgpt "\parbox{2cm}{\centering Pct Org Capital (ChatGPT)}"

. label var pct_seed "\parbox{2cm}{\centering Pct Org Capital (Seed Word)}"

. label var pct_avg "\parbox{2cm}{\centering Pct Org Capital (Average)}"

. label var demeaned_oc_scaled_std "\parbox{2cm}{\centering Org Capital (SG\&A)}"

. label var overallrating "\parbox{2cm}{\centering Overall Rating}"

. label var balance "\parbox{2cm}{\centering Balance Rating}"

. label var culture "\parbox{2cm}{\centering Culture Rating}"

. label var opportunities "\parbox{2cm}{\centering Opportunities Rating}"

. label var compensation "\parbox{2cm}{\centering Compensation Rating}"

. label var management "\parbox{2cm}{\centering Management Rating}"

. label var sustain_s "\parbox{2cm}{\centering Social Score}"

. label var sustain_g "\parbox{2cm}{\centering Governance Score}"

. label var sustain_e "\parbox{2cm}{\centering Environmental Score}"

. label var sustain_esg "\parbox{2cm}{\centering ESG Score}"

. label var DIV_str_num "\parbox{2cm}{\centering \# Diversity Strengths}"

. label var DIV_con_num "\parbox{2cm}{\centering \# Diversity Concerns}"

. label var EMP_str_num "\parbox{2cm}{\centering \# Employee Relation Strengths}"

. label var EMP_con_num "\parbox{2cm}{\centering \# Employee Relation Concerns}"

. label var ROA_w "\parbox{2cm}{\centering ROA}"

. label var roe_w "\parbox{2cm}{\centering ROE}"

. label var mk2book_w "\parbox{2cm}{\centering P/B Ratio}"

. label var lnat "\parbox{2cm}{\centering Ln(Total Assets)}"

. label var mm "\parbox{2cm}{\centering Firm Age}"

. label var sale_growth_w "\parbox{2cm}{\centering Sales Growth}"

. label var leverage_w "\parbox{2cm}{\centering Leverage}"

. label var turnover "\parbox{2cm}{\centering Employee Turnover}"

. label var avgsalary "\parbox{2cm}{\centering Average Employee Compensation}"

. label var std_int_chatgpt "\parbox{2cm}{\centering Interview Org Capital (ChatGPT)}"

. label var std_int_seed "\parbox{2cm}{\centering Interview Org Capital (Seed Word)}"

. label var std_int_avg "\parbox{2cm}{\centering Interview Org Capital (Average)}"

. label var pct_int_chatgpt "\parbox{2cm}{\centering Interview Pct Org Capital (ChatGPT)}"

. label var pct_int_seed "\parbox{2cm}{\centering Interview Pct Org Capital (Seed Word)}"

. label var pct_int_avg "\parbox{2cm}{\centering Interview Pct Org Capital (Average)}"

. save "Intermediate/temp15.dta", replace
file Intermediate/temp15.dta saved

. 
. 
. ********************************************************************************
. * Section 5: Merge LinkUp, Solow Residual, Violations, and HHI
. ********************************************************************************
. 
. * ── LinkUp job posting data ──────────────────────────────────────────────────
. 
. import delimited "Raw/final_df_linkup.csv", clear
(encoding automatically selected: ISO-8859-1)
(5 vars, 8,176 obs)

. drop v1

. rename sim linkup_seed

. rename sim2chatgpt linkup_chatgpt

. merge 1:1 gvkey year using "Intermediate/temp15.dta"
(variable year was int, now double to accommodate using data's values)
(variable gvkey was long, now double to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                         7,270
        from master                         0  (_merge==1)
        from using                      7,270  (_merge==2)

    Matched                             8,176  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                         7,270
>         from master                         0  (_merge==1)
>         from using                      7,270  (_merge==2)
> 
>     Matched                             8,176  (_merge==3)
>     -----------------------------------------
> */
. drop _m

. 
. egen std_linkup_seed=std(linkup_seed)
(7,270 missing values generated)

. egen std_linkup_chatgpt=std(linkup_chatgpt)
(7,270 missing values generated)

. egen std_linkup_avg=rowmean(std_linkup_seed std_linkup_chatgpt)
(7,270 missing values generated)

. xtile pct_linkup_seed=std_linkup_seed, n(100)

. xtile pct_linkup_chatgpt=std_linkup_chatgpt, n(100)

. xtile pct_linkup_avg=std_linkup_avg, n(100)

. 
. save "Intermediate/temp16.dta", replace
file Intermediate/temp16.dta saved

. 
. * ── Solow model residual ────────────────────────────────────────────────────
. 
. use "Raw/Compustat All Raw.dta", clear

. destring sic, replace
sic: all characters numeric; replaced as int
(2 missing values generated)

. sicff sic, industry(17) generate(ff17)

. tab ff17

       ff17 |      Freq.     Percent        Cum.
------------+-----------------------------------
   (1) Food |      9,522        3.04        3.04
  (2) Mines |      5,921        1.89        4.93
    (3) Oil |     14,053        4.49        9.42
  (4) Clths |      6,473        2.07       11.49
  (5) Durbl |      8,359        2.67       14.16
  (6) Chems |      5,329        1.70       15.86
  (7) Cnsum |     10,379        3.31       19.17
  (8) Cnstr |     11,597        3.70       22.88
  (9) Steel |      4,632        1.48       24.35
 (10) FabPr |      2,717        0.87       25.22
 (11) Machn |     35,671       11.39       36.61
  (12) Cars |      4,473        1.43       38.04
 (13) Trans |     11,430        3.65       41.69
 (14) Utils |     10,449        3.34       45.03
 (15) Rtail |     16,468        5.26       50.29
 (16) Finan |     71,680       22.89       73.18
 (17) Other |     83,987       26.82      100.00
------------+-----------------------------------
      Total |    313,140      100.00

. gen lnsale=ln(sale)
(27,619 missing values generated)

. gen lnppe=ln(ppegt)
(58,922 missing values generated)

. gen lnemp=ln(emp)
(45,050 missing values generated)

. keep lnsale lnppe lnemp ff17 fyear GVKEY

. destring GVKEY, gen(gvkey)
GVKEY: all characters numeric; gvkey generated as long

. foreach var in lnsale lnppe lnemp ff17 fyear GVKEY{
  2.         drop if missing(`var')
  3. }
(27,619 observations deleted)
(35,613 observations deleted)
(13,199 observations deleted)
(4,794 observations deleted)
(0 observations deleted)
(0 observations deleted)

. duplicates drop

Duplicates in terms of all variables

(2,266 observations deleted)

. drop GVKEY

. save "Intermediate/solow_model_residual.dta", replace
file Intermediate/solow_model_residual.dta saved

. 
. use "Intermediate/solow_model_residual.dta", clear

. gen solow_residual=.
(236,241 missing values generated)

. forvalues ff=1/17 {
  2.         forvalues yr=2008/2020 {
  3.                 qui reg lnsale lnppe lnemp if ff17 == `ff' & fyear == `yr'
  4.                 predict solow_residual_`ff'_`yr', residual
  5.                 replace solow_residual=solow_residual_`ff'_`yr' if ff17 == `ff' & fyear == `yr'
  6.         }
  7. }
(122 real changes made)
(128 real changes made)
(123 real changes made)
(118 real changes made)
(120 real changes made)
(115 real changes made)
(114 real changes made)
(112 real changes made)
(111 real changes made)
(106 real changes made)
(100 real changes made)
(98 real changes made)
(104 real changes made)
(82 real changes made)
(82 real changes made)
(88 real changes made)
(90 real changes made)
(99 real changes made)
(102 real changes made)
(104 real changes made)
(93 real changes made)
(89 real changes made)
(94 real changes made)
(90 real changes made)
(87 real changes made)
(89 real changes made)
(226 real changes made)
(221 real changes made)
(212 real changes made)
(215 real changes made)
(212 real changes made)
(220 real changes made)
(228 real changes made)
(212 real changes made)
(198 real changes made)
(197 real changes made)
(189 real changes made)
(173 real changes made)
(149 real changes made)
(59 real changes made)
(58 real changes made)
(58 real changes made)
(58 real changes made)
(54 real changes made)
(51 real changes made)
(46 real changes made)
(44 real changes made)
(43 real changes made)
(41 real changes made)
(39 real changes made)
(41 real changes made)
(41 real changes made)
(77 real changes made)
(68 real changes made)
(67 real changes made)
(65 real changes made)
(60 real changes made)
(58 real changes made)
(60 real changes made)
(59 real changes made)
(58 real changes made)
(60 real changes made)
(65 real changes made)
(65 real changes made)
(68 real changes made)
(84 real changes made)
(85 real changes made)
(82 real changes made)
(80 real changes made)
(78 real changes made)
(80 real changes made)
(80 real changes made)
(83 real changes made)
(85 real changes made)
(85 real changes made)
(79 real changes made)
(75 real changes made)
(77 real changes made)
(205 real changes made)
(193 real changes made)
(180 real changes made)
(167 real changes made)
(152 real changes made)
(152 real changes made)
(156 real changes made)
(155 real changes made)
(155 real changes made)
(148 real changes made)
(161 real changes made)
(160 real changes made)
(167 real changes made)
(120 real changes made)
(111 real changes made)
(112 real changes made)
(116 real changes made)
(115 real changes made)
(128 real changes made)
(129 real changes made)
(128 real changes made)
(126 real changes made)
(125 real changes made)
(127 real changes made)
(124 real changes made)
(124 real changes made)
(46 real changes made)
(45 real changes made)
(48 real changes made)
(50 real changes made)
(50 real changes made)
(52 real changes made)
(50 real changes made)
(45 real changes made)
(44 real changes made)
(44 real changes made)
(44 real changes made)
(41 real changes made)
(41 real changes made)
(28 real changes made)
(30 real changes made)
(29 real changes made)
(29 real changes made)
(28 real changes made)
(27 real changes made)
(28 real changes made)
(27 real changes made)
(26 real changes made)
(27 real changes made)
(25 real changes made)
(24 real changes made)
(25 real changes made)
(581 real changes made)
(560 real changes made)
(552 real changes made)
(540 real changes made)
(526 real changes made)
(512 real changes made)
(495 real changes made)
(469 real changes made)
(443 real changes made)
(435 real changes made)
(427 real changes made)
(406 real changes made)
(409 real changes made)
(56 real changes made)
(52 real changes made)
(55 real changes made)
(58 real changes made)
(59 real changes made)
(59 real changes made)
(60 real changes made)
(60 real changes made)
(59 real changes made)
(59 real changes made)
(62 real changes made)
(61 real changes made)
(64 real changes made)
(179 real changes made)
(176 real changes made)
(175 real changes made)
(163 real changes made)
(162 real changes made)
(165 real changes made)
(171 real changes made)
(169 real changes made)
(168 real changes made)
(166 real changes made)
(169 real changes made)
(165 real changes made)
(167 real changes made)
(127 real changes made)
(126 real changes made)
(122 real changes made)
(117 real changes made)
(112 real changes made)
(110 real changes made)
(115 real changes made)
(114 real changes made)
(110 real changes made)
(110 real changes made)
(106 real changes made)
(102 real changes made)
(97 real changes made)
(211 real changes made)
(208 real changes made)
(204 real changes made)
(199 real changes made)
(201 real changes made)
(203 real changes made)
(200 real changes made)
(201 real changes made)
(189 real changes made)
(184 real changes made)
(184 real changes made)
(174 real changes made)
(174 real changes made)
(319 real changes made)
(317 real changes made)
(324 real changes made)
(322 real changes made)
(311 real changes made)
(316 real changes made)
(332 real changes made)
(335 real changes made)
(317 real changes made)
(316 real changes made)
(312 real changes made)
(314 real changes made)
(326 real changes made)
(1,498 real changes made)
(1,413 real changes made)
(1,376 real changes made)
(1,341 real changes made)
(1,307 real changes made)
(1,344 real changes made)
(1,426 real changes made)
(1,442 real changes made)
(1,412 real changes made)
(1,398 real changes made)
(1,429 real changes made)
(1,451 real changes made)
(1,522 real changes made)

. su solow_residual, detail

                       solow_residual
-------------------------------------------------------------
      Percentiles      Smallest
 1%    -3.102121      -9.642337
 5%    -1.408198      -9.219996
10%    -.9530778      -8.197727       Obs              48,351
25%     -.403627       -8.09217       Sum of wgt.      48,351

50%     .0390849                      Mean           1.49e-10
                        Largest       Std. dev.      .9332177
75%     .4726444       6.125216
90%     .9789885       6.228163       Variance       .8708953
95%      1.37374       7.000781       Skewness      -1.069515
99%     2.211804       8.073595       Kurtosis       10.84168

. keep if fyear>=2008 & fyear<=2020
(187,890 observations deleted)

. keep solow_residual gvkey fyear ff17

. save "Intermediate/solow_model_residual_2008_2020.dta", replace
file Intermediate/solow_model_residual_2008_2020.dta saved

. 
. * ── ViolationTracker data ────────────────────────────────────────────────────
. 
. import excel "Raw/smallerViolationTracker_basic_20dec20.xlsx", sheet("Sheet1") firstrow clear
(8 vars, 444,685 obs)

. keep parent_name pen_year offense_group penalty

. drop if parent_name==""
(349,635 observations deleted)

. rename penalty penaltyamount

. rename pen_year fyear

. gen violation=1

. save "Intermediate/violation raw.dta", replace
file Intermediate/violation raw.dta saved

. 
. 
. 
. import excel "Raw/ViolationTracker_parents_20dec20.xlsx", sheet("Sheet1") firstrow clear
(16 vars, 3,341 obs)

. keep parent_name cik stock_ticker

. 
. save "Intermediate/violation parent_name.dta", replace
file Intermediate/violation parent_name.dta saved

. 
. 
. 
. use "Intermediate/violation raw.dta", clear

. 
. merge m:1 parent_name using "Intermediate/violation parent_name.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                            30
        from master                        30  (_merge==1)
        from using                          0  (_merge==2)

    Matched                            95,020  (_merge==3)
    -----------------------------------------

. drop _merge

. save, replace
file Intermediate/violation raw.dta saved

. 
. 
. use "Intermediate/violation raw.dta", clear

. gen safety_violation=(offense_group=="safety-related offenses")

. gen miscellaneous_violation=(offense_group=="miscellaneous offenses")

. gen healthcare_violation=(offense_group=="healthcare-related offenses")

. gen government_violation=(offense_group=="government-contracting-related offenses")

. gen financial_violation=(offense_group=="financial offenses")

. gen environment_violation=(offense_group=="environment-related offenses")

. gen employment_violation=(offense_group=="employment-related offenses")

. gen consumer_violation=(offense_group=="consumer-protection-related offenses")

. gen competition_violation=(offense_group=="competition-related offenses")

. collapse (sum)penaltyamount violationnum=violation (max)safety_violation miscellaneous_violation healthcare_violation government_violation financial_violation environment_violation employment_violation consumer_violation competition_violation violation,
>  by(cik fyear)

. 
. save "Intermediate/violation.dta", replace
file Intermediate/violation.dta saved

. 
. * ── Restatement data ─────────────────────────────────────────────────────────
. 
. use "Raw/raw_restatement.dta", clear

. rename COMPANY_FKEY cik

. destring cik, replace
cik: all characters numeric; replaced as long

. rename BEST_EDGAR_TICKER TICKER

. gen restatebeginyear=year(RES_BEGIN_DATE)

. gen restateendyear=year(RES_END_DATE)

. gen restatefileyear=year(FILE_DATE)

. drop RES_END_DATE RES_BEGIN_DATE FILE_DATE

. keep cik restatefileyear

. 
. 
. rename restatefileyear fyear

. gen restate=1

. label variable restate "1 for the announcement year"

. duplicates drop cik fyear, force

Duplicates in terms of cik fyear

(1,597 observations deleted)

. 
. save "Intermediate/restatement.dta", replace
file Intermediate/restatement.dta saved

. 
. * ── HHI (industry competitiveness) ──────────────────────────────────────────
. 
. use "Raw/Compustat All Raw.dta", clear

. keep GVKEY fyear mkvalt sic

. duplicates drop

Duplicates in terms of all variables

(2,656 observations deleted)

. destring GVKEY, gen(gvkey)
GVKEY: all characters numeric; gvkey generated as long

. gen sic2=substr(sic,1,2)
(2 missing values generated)

. destring sic2, replace
sic2: all characters numeric; replaced as byte
(2 missing values generated)

. drop if missing(sic2)
(2 observations deleted)

. drop if missing(mkvalt)
(196,935 observations deleted)

. bysort sic2 fyear: egen sum_mkt=sum(mkvalt)

. gen share_sq=(mkvalt/sum_mkt)^2

. collapse (sum)HHI=share_sq, by(sic2 fyear)

. bysort fyear: egen temp=median(HHI)

. gen competitive=(HHI<temp)

. drop temp

. save "Intermediate/HHI.dta", replace
file Intermediate/HHI.dta saved

. 
. 
. 
. use "Raw/Compustat All Raw.dta", clear

. destring sic, replace
sic: all characters numeric; replaced as int
(2 missing values generated)

. sicff sic, industry(17) generate(ff17)

. keep GVKEY fyear mkvalt ff17

. destring GVKEY, gen(gvkey)
GVKEY: all characters numeric; gvkey generated as long

. drop if missing(ff17)
(6,592 observations deleted)

. drop if missing(mkvalt)
(194,604 observations deleted)

. duplicates drop

Duplicates in terms of all variables

(1,238 observations deleted)

. bysort ff17 fyear: egen sum_mkt=sum(mkvalt)

. gen share_sq=(mkvalt/sum_mkt)^2

. collapse (sum)HHI=share_sq, by(ff17 fyear)

. bysort fyear: egen temp=median(HHI)

. gen competitive=(HHI<temp)

. drop temp

. rename HHI HHI_ff17

. rename competitive competitive_ff17

. save "Intermediate/HHI_ff17.dta", replace
file Intermediate/HHI_ff17.dta saved

. 
. * ── Assemble temp17 ─────────────────────────────────────────────────────────
. 
. use "Intermediate/temp16.dta", clear

. 
. destring sic, replace
sic: all characters numeric; replaced as int

. sicff sic, industry(17) generate(ff17)

. merge 1:1 gvkey fyear using "Intermediate/solow_model_residual_2008_2020.dta"
(label lbl_ff_17 already defined)

    Result                      Number of obs
    -----------------------------------------
    Not matched                        37,093
        from master                     2,094  (_merge==1)
        from using                     34,999  (_merge==2)

    Matched                            13,352  (_merge==3)
    -----------------------------------------

. /*
> 
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        37,093
>         from master                     2,094  (_merge==1)
>         from using                     34,999  (_merge==2)
> 
>     Matched                            13,352  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(34,999 observations deleted)

. drop _m

. 
. drop restate

. destring cik, replace
cik: all characters numeric; replaced as long
(35 missing values generated)

. duplicates report cik fyear if !missing(cik)

Duplicates in terms of cik fyear

--------------------------------------
   Copies | Observations       Surplus
----------+---------------------------
        1 |        15411             0
--------------------------------------

. merge m:1 cik fyear using "Intermediate/restatement.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        30,095
        from master                    14,414  (_merge==1)
        from using                     15,681  (_merge==2)

    Matched                             1,032  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        30,095
>         from master                    14,414  (_merge==1)
>         from using                     15,681  (_merge==2)
> 
>     Matched                             1,032  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(15,681 observations deleted)

. drop _merge

. replace restate=0 if restate==.
(14,414 real changes made)

. 
. merge m:1 cik fyear using  "Intermediate/violation.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        18,027
        from master                    10,196  (_merge==1)
        from using                      7,831  (_merge==2)

    Matched                             5,250  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        18,027
>         from master                    10,196  (_merge==1)
>         from using                      7,831  (_merge==2)
> 
>     Matched                             5,250  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(7,831 observations deleted)

. drop _merge

. foreach var in penaltyamount violationnum safety_violation miscellaneous_violation healthcare_violation government_violation financial_violation environment_violation employment_violation consumer_violation competition_violation violation{
  2.         replace `var'=0 if missing(`var')
  3. }
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)
(10,196 real changes made)

. 
. destring execid, replace
execid: all characters numeric; replaced as long

. 
. merge 1:1 gvkey fyear using "Raw/gai_1992_2016.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        35,387
        from master                     7,630  (_merge==1)
        from using                     27,757  (_merge==2)

    Matched                             7,816  (_merge==3)
    -----------------------------------------

. /*
> 
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        35,387
>         from master                     7,630  (_merge==1)
>         from using                     27,757  (_merge==2)
> 
>     Matched                             7,816  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(27,757 observations deleted)

. drop _m

. 
. merge m:1 sic2 fyear using "Intermediate/HHI.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                           850
        from master                         0  (_merge==1)
        from using                        850  (_merge==2)

    Matched                            15,446  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                           850
>         from master                         0  (_merge==1)
>         from using                        850  (_merge==2)
> 
>     Matched                            15,446  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(850 observations deleted)

. drop _m

. 
. merge m:1 ff17 fyear using "Intermediate/HHI_ff17.dta"
(label lbl_ff_17 already defined)

    Result                      Number of obs
    -----------------------------------------
    Not matched                           421
        from master                       234  (_merge==1)
        from using                        187  (_merge==2)

    Matched                            15,212  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                           421
>         from master                       234  (_merge==1)
>         from using                        187  (_merge==2)
> 
>     Matched                            15,212  (_merge==3)
>     -----------------------------------------
> */
. drop if _m==2
(187 observations deleted)

. drop _m

. 
. gen asset_turnover=sale/at
(1 missing value generated)

. winsor2 asset_turnover

. save "Intermediate/temp17.dta", replace
file Intermediate/temp17.dta saved

. 
. 
. ********************************************************************************
. * Section 6: Final Dataset Construction
. ********************************************************************************
. * PAUSE: Run code/short_lived_CEOs.py and code/pay_ratio_cleaning.py before
. *        continuing past this point.
. 
. use "Intermediate/temp17.dta", clear

. egen indyear=group(sic2 year)

. save "Intermediate/temp18.dta", replace
file Intermediate/temp18.dta saved

. 
. * ── Import short-lived CEOs ──────────────────────────────────────────────────
. 
. import delimited "Intermediate/num_ceos_past_10_years.csv", clear
(encoding automatically selected: ISO-8859-1)
(3 vars, 59,017 obs)

. save "Intermediate/num_ceos_past_10_years.dta", replace
file Intermediate/num_ceos_past_10_years.dta saved

. 
. * ── Prepare orgcapital subset for prior-firm OC change ──────────────────────
. 
. use "Intermediate/temp17.dta", clear

. keep gvkey year std_net_avg std_net_chatgpt std_net_seed pct_avg pct_chatgpt pct_seed

. save "Intermediate/orgcapital.dta", replace
file Intermediate/orgcapital.dta saved

. 
. * ── Compute prior-firm organizational capital change ─────────────────────────
. 
. use "Raw/CEO_transition.dta", clear

. keep gvkey co_per_rol year prior_firm_1 prior_pos_1 prior_pos_startyr_1 becameceoyear

. drop if missing(prior_firm_1)
(581 observations deleted)

. rename gvkey newfirmgvkey

. merge m:1 prior_firm_1 using "Raw/prior_firm_id.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                         1,494
        from master                     1,485  (_merge==1)
        from using                          9  (_merge==2)

    Matched                               652  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                         1,494
>         from master                     1,485  (_merge==1)
>         from using                          9  (_merge==2)
> 
>     Matched                               652  (_merge==3)
>     -----------------------------------------
> */
. keep if _m==3
(1,494 observations deleted)

. drop _m

. replace prior_pos_startyr_1=substr(prior_pos_startyr_1,1,4) if strrpos(prior_pos_startyr_1,"/")!=3
(221 real changes made)

. replace prior_pos_startyr_1=substr(prior_pos_startyr_1,-4,4) if strrpos(prior_pos_startyr_1,"/")==3
(170 real changes made)

. destring prior_pos_startyr_1, replace
prior_pos_startyr_1: all characters numeric; replaced as int
(14 missing values generated)

. 
. gen diff=becameceoyear-prior_pos_startyr_1+1
(14 missing values generated)

. drop if missing(diff) | diff<=0
(16 observations deleted)

. expand diff
(5,591 observations created)

. sort newfirmgvkey year

. drop year

. bysort co_per_rol: gen year=prior_pos_startyr_1+_n-1

. 
. rename prior_gvkey gvkey

. merge m:1 gvkey year using "Intermediate/orgcapital.dta"
(variable gvkey was long, now double to accommodate using data's values)
(variable year was float, now double to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                        19,849
        from master                     5,172  (_merge==1)
        from using                     14,677  (_merge==2)

    Matched                             1,055  (_merge==3)
    -----------------------------------------

. /*
>     Result                      Number of obs
>     -----------------------------------------
>     Not matched                        19,849
>         from master                     5,172  (_merge==1)
>         from using                     14,677  (_merge==2)
> 
>     Matched                             1,055  (_merge==3)
>     -----------------------------------------
> */
. keep if _m==3
(19,849 observations deleted)

. drop _m

. sort co_per_rol year

. foreach var in std_net_avg std_net_chatgpt std_net_seed pct_avg pct_chatgpt pct_seed{
  2.         by co_per_rol: gen temp=`var' if _n==1
  3.         by co_per_rol: egen first_`var'=min(temp)
  4.         drop temp
  5.         by co_per_rol: gen temp=`var' if _n==_N
  6.         by co_per_rol: egen last_`var'=min(temp)
  7.         drop temp
  8.         gen chg_`var'=first_`var'-last_`var'
  9. }
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)
(874 missing values generated)

. keep co_per_rol chg_pct_avg chg_pct_chatgpt chg_pct_seed chg_std_net_avg chg_std_net_chatgpt chg_std_net_seed becameceoyear

. duplicates drop

Duplicates in terms of all variables

(874 observations deleted)

. save "Intermediate/prior_firm_orgcapital_change.dta", replace
file Intermediate/prior_firm_orgcapital_change.dta saved

. 
. * ── CEO compensation ─────────────────────────────────────────────────────────
. 
. use "Raw/annual_comp.dta", clear
((null))

. keep co_per_rol year tdc1

. duplicates drop

Duplicates in terms of all variables

(0 observations are duplicates)

. gen lntdc1=ln(tdc1)
(478 missing values generated)

. save "Intermediate/ceocomp.dta", replace
file Intermediate/ceocomp.dta saved

. 
. * ── Pay ratio ────────────────────────────────────────────────────────────────
. 
. import delimited "Intermediate/pay_ratio_cleaned.csv", clear
(encoding automatically selected: UTF-8)
(2 vars, 2,189 obs)

. egen temp=median(pay_ratio_cleaned)

. gen low_pay_ratio=(pay_ratio_cleaned<temp)

. drop temp

. gen ln_pay_ratio=ln(pay_ratio_cleaned)

. save "Intermediate/pay_ratio_cleaned.dta", replace
file Intermediate/pay_ratio_cleaned.dta saved

. 
. * ── Final merges ─────────────────────────────────────────────────────────────
. 
. use "Intermediate/temp18.dta", clear

. 
. merge 1:1 gvkey year using "Intermediate/num_ceos_past_10_years.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        43,577
        from master                         3  (_merge==1)
        from using                     43,574  (_merge==2)

    Matched                            15,443  (_merge==3)
    -----------------------------------------

. drop if _m==2
(43,574 observations deleted)

. drop _m

. 
. merge 1:1 gvkey fyear using "Raw/Eindex rgvkey to merge.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                        10,106
        from master                     3,962  (_merge==1)
        from using                      6,144  (_merge==2)

    Matched                            11,484  (_merge==3)
    -----------------------------------------

. 
. drop if _m==2
(6,144 observations deleted)

. drop _m

. 
. merge m:1 ticker using "Intermediate/pay_ratio_cleaned.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                         4,732
        from master                     3,709  (_merge==1)
        from using                      1,023  (_merge==2)

    Matched                            11,737  (_merge==3)
    -----------------------------------------

. drop if _m==2
(1,023 observations deleted)

. drop _m

. 
. sort gvkey year

. xtset gvkey year

Panel variable: gvkey (unbalanced)
 Time variable: year, 2008 to 2020, but with gaps
         Delta: 1 unit

. 
. egen temp=min(net_seed)

. gen net_seed_shifted=net_seed-temp+1

. drop temp

. 
. egen temp=min(net_chatgpt)

. gen net_chatgpt_shifted=net_chatgpt-temp+1

. drop temp

. 
. gen chatgpt_growth=(net_chatgpt_shifted-l.net_chatgpt_shifted)/l.net_chatgpt_shifted*100
(2,059 missing values generated)

. gen seed_growth=(net_seed_shifted-l.net_seed_shifted)/l.net_seed_shifted*100
(2,059 missing values generated)

. 
. gen tenureasceo=year-year(becameceo)
(136 missing values generated)

. replace tenureasceo=. if tenureasceo<0
(260 real changes made, 260 to missing)

. 
. save "Intermediate/temp19.dta", replace
file Intermediate/temp19.dta saved

. 
. 
. quietly log close
