Monday, February 22, 2016

Association between prior smoking and depression


In this analysis, I examine if the association between nicotine dependence and depression holds true also when there was a major smoking prior to the last 12 months reported.
From the post-hoc tests we see that the X2 value increases and the p-value shrinks when we compare infrequent smokers to frequent smokers (See tables in the Coding section). The significance is lesser than if we only compare the last 12 months, maybe because both are indicator for actual smoking. Future tests should investigate actual smoking and depression.



Appendix Code

%matplotlib inline
import pandas
import numpy
import scipy.stats
import seaborn
import matplotlib.pyplot as plt
In [33]:
data = pandas.read_csv('nesarc_pds.csv', low_memory=False)
In [41]:
data['TAB12MDX'] = data['TAB12MDX'].convert_objects(convert_numeric=True)
data['TABP12MDX'] = data['TABP12MDX'].convert_objects(convert_numeric=True)
data['CHECK321'] = data['CHECK321'].convert_objects(convert_numeric=True)
data['S3AQ3B1'] = data['S3AQ3B1'].convert_objects(convert_numeric=True)
data['S3AQ3C1'] = data['S3AQ3C1'].convert_objects(convert_numeric=True)
data['AGE'] = data['AGE'].convert_objects(convert_numeric=True)

#subset data to young adults age 18 to 25 who have smoked in the past 12 months
sub1=data[(data['AGE']>=18) & (data['AGE']<=25) & (data['CHECK321']==1)]

#make a copy of my new subsetted data
sub2 = sub1.copy()

# recode missing values to python missing (NaN)
sub2['S3AQ3B1']=sub2['S3AQ3B1'].replace(9, numpy.nan)
sub2['S3AQ3C1']=sub2['S3AQ3C1'].replace(99, numpy.nan)

#recoding values for S3AQ3B1 into a new variable, USFREQMO
recode1 = {1: 30, 2: 22, 3: 14, 4: 6, 5: 2.5, 6: 1}
sub2['USFREQMO']= sub2['S3AQ3B1'].map(recode1)
/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:1: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  if __name__ == '__main__':
/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app
/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:3: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  app.launch_new_instance()
/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:4: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:5: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
In [43]:
# contingency table of observed counts
ct1=pandas.crosstab(sub2['TABP12MDX'], sub2['USFREQMO'])
print (ct1)

# column percentages
colsum=ct1.sum(axis=0)
colpct=ct1/colsum
print(colpct)

# chi-square
print ('chi-square value, p value, expected counts')
cs1= scipy.stats.chi2_contingency(ct1)
print (cs1)
USFREQMO  1.0   2.5   6.0   14.0  22.0  30.0
TAB12MDX                                    
0           64    53    69    59    41   521
1            7    12    19    32    27   799
USFREQMO      1.0       2.5       6.0       14.0      22.0      30.0
TAB12MDX                                                            
0         0.901408  0.815385  0.784091  0.648352  0.602941  0.394697
1         0.098592  0.184615  0.215909  0.351648  0.397059  0.605303
chi-square value, p value, expected counts
(165.27320708055845, 7.4363642083905987e-34, 5, array([[  33.64474457,   30.80152672,   41.70052848,   43.1221374 ,
          32.22313564,  625.50792719],
       [  37.35525543,   34.19847328,   46.29947152,   47.8778626 ,
          35.77686436,  694.49207281]]))
In [45]:
# set variable types 
sub2["USFREQMO"] = sub2["USFREQMO"].astype('category')
# new code for setting variables to numeric:
sub2['TAB12MDX'] = pandas.to_numeric(sub2['TABP12MDX'], errors='coerce')

# old code for setting variables to numeric:
#sub2['TAB12MDX'] = sub2['TAB12MDX'].convert_objects(convert_numeric=True)

# graph percent with nicotine dependence within each smoking frequency group 
seaborn.factorplot(x="USFREQMO", y="TAB12MDX", data=sub2, kind="bar", ci=None)
plt.xlabel('Days smoked per month')
plt.ylabel('Proportion Nicotine Dependent')

recode2 = {1: 1, 2.5: 2.5}
sub2['COMP1v2']= sub2['USFREQMO'].map(recode2)

# contingency table of observed counts
ct2=pandas.crosstab(sub2['TAB12MDX'], sub2['COMP1v2'])
print (ct2)

# column percentages
colsum=ct2.sum(axis=0)
colpct=ct2/colsum
print(colpct)

print ('chi-square value, p value, expected counts')
cs2= scipy.stats.chi2_contingency(ct2)
print (cs2)

recode3 = {1: 1, 6: 6}
sub2['COMP1v6']= sub2['USFREQMO'].map(recode3)

# contingency table of observed counts
ct3=pandas.crosstab(sub2['TAB12MDX'], sub2['COMP1v6'])
print (ct3)

# column percentages
colsum=ct3.sum(axis=0)
colpct=ct3/colsum
print(colpct)

print ('chi-square value, p value, expected counts')
cs3= scipy.stats.chi2_contingency(ct3)
print (cs3)

recode4 = {1: 1, 14: 14}
sub2['COMP1v14']= sub2['USFREQMO'].map(recode4)

# contingency table of observed counts
ct4=pandas.crosstab(sub2['TAB12MDX'], sub2['COMP1v14'])
print (ct4)

# column percentages
colsum=ct4.sum(axis=0)
colpct=ct4/colsum
print(colpct)

print ('chi-square value, p value, expected counts')
cs4= scipy.stats.chi2_contingency(ct4)
print (cs4)

recode5 = {1: 1, 22: 22}
sub2['COMP1v22']= sub2['USFREQMO'].map(recode5)

# contingency table of observed counts
ct5=pandas.crosstab(sub2['TAB12MDX'], sub2['COMP1v22'])
print (ct5)

# column percentages
colsum=ct5.sum(axis=0)
colpct=ct5/colsum
print(colpct)

print ('chi-square value, p value, expected counts')
cs5= scipy.stats.chi2_contingency(ct5)
print (cs5)

recode6 = {1: 1, 30: 30}
sub2['COMP1v30']= sub2['USFREQMO'].map(recode6)

# contingency table of observed counts
ct6=pandas.crosstab(sub2['TAB12MDX'], sub2['COMP1v30'])
print (ct6)

# column percentages
colsum=ct6.sum(axis=0)
colpct=ct6/colsum
print(colpct)

print ('chi-square value, p value, expected counts')
cs6= scipy.stats.chi2_contingency(ct6)
print (cs6)

recode7 = {2.5: 2.5, 6: 6}
sub2['COMP2v6']= sub2['USFREQMO'].map(recode7)

# contingency table of observed counts
ct7=pandas.crosstab(sub2['TAB12MDX'], sub2['COMP2v6'])
print (ct7)

# column percentages
colsum=ct7.sum(axis=0)
colpct=ct7/colsum
print(colpct)

print ('chi-square value, p value, expected counts')
cs7=scipy.stats.chi2_contingency(ct7)
print (cs7)
COMP1v2   1.0  2.5
TAB12MDX          
0          55   49
1          16   16
COMP1v2        1.0       2.5
TAB12MDX                    
0         0.774648  0.753846
1         0.225352  0.246154
chi-square value, p value, expected counts
(0.0069422451870988916, 0.93359698915826184, 1, array([[ 54.29411765,  49.70588235],
       [ 16.70588235,  15.29411765]]))
COMP1v6    1   6
TAB12MDX        
0         55  71
1         16  17
COMP1v6          1         6
TAB12MDX                    
0         0.774648  0.806818
1         0.225352  0.193182
chi-square value, p value, expected counts
(0.090349171475131884, 0.76373372487916535, 1, array([[ 56.26415094,  69.73584906],
       [ 14.73584906,  18.26415094]]))
COMP1v14  1   14
TAB12MDX        
0         55  64
1         16  27
COMP1v14        1         14
TAB12MDX                    
0         0.774648  0.703297
1         0.225352  0.296703
chi-square value, p value, expected counts
(0.70756615276814527, 0.40025297614002775, 1, array([[ 52.15432099,  66.84567901],
       [ 18.84567901,  24.15432099]]))
COMP1v22  1   22
TAB12MDX        
0         55  41
1         16  27
COMP1v22        1         22
TAB12MDX                    
0         0.774648  0.602941
1         0.225352  0.397059
chi-square value, p value, expected counts
(4.0231238896127559, 0.044880510850748734, 1, array([[ 49.03597122,  46.96402878],
       [ 21.96402878,  21.03597122]]))
COMP1v30  1    30
TAB12MDX         
0         55  624
1         16  696
COMP1v30        1         30
TAB12MDX                    
0         0.774648  0.472727
1         0.225352  0.527273
chi-square value, p value, expected counts
(23.387240486657827, 1.3245419870769008e-06, 1, array([[  34.65780014,  644.34219986],
       [  36.34219986,  675.65780014]]))
COMP2v6   2.5  6.0
TAB12MDX          
0          49   71
1          16   17
COMP2v6        2.5       6.0
TAB12MDX                    
0         0.753846  0.806818
1         0.246154  0.193182
chi-square value, p value, expected counts
(0.34652664097266372, 0.55608593058763378, 1, array([[ 50.98039216,  69.01960784],
       [ 14.01960784,  18.98039216]]))

No comments: