GenRA Data preparation and loading¶

The data was downloaded from the NCCT Website into a directory DAT_DIR

%load_ext autoreload
%autoreload 2
%load_ext sql 
import matplotlib.text as text

import scipy.interpolate as interp
import pandas as pd
#from mp.txpepa import *
#from bio.data.toxplorer import *
#import bio.hts.apredica as apr
#from bio.hts.htsdb import *
#from bio.data.toxplorer import *
#import viz.clust as cv
#from chem.clust import *
from sklearn import (manifold, datasets, decomposition, ensemble, lda,
                     random_projection)
from sklearn.metrics.pairwise import euclidean_distances,manhattan_distances
import statsmodels.api as sm
import numpy.linalg as LA
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector
stats = importr('stats')
from sklearn.neighbors import KNeighborsClassifier
from genra.readacross import *

mng.register_connection("hts-db","htsdb",username="ishah",
                        password="xxx",host='localhost')
mng.register_connection("txp-db","toxplorerdb",username="ishah",
                        password="xxx",host='localhost')

#%sql postgresql://ishah:xxx@localhost/chemicals
#CD = ChemDrawing()

DAT_DIR = '/share/home/ishah/projects/Chem/data/tables/'
PKL_DIR = '/share/home/ishah/projects/Chem/data/pickle/'
import pickle

tmstmp = time.strftime("%m-%d-%Y",time.localtime())

Identify data files¶

import zipfile
DAT_DIR = '/share/home/ishah/projects/ToxCast/data/TX14/'
FD = {}
for F in os.listdir(DAT_DIR):
    print '\n>',F,'\n'
    ZF1 = zipfile.ZipFile(DAT_DIR+F,'r')
    print "\n\t".join([i.filename for i in ZF1.filelist])
    
    for i in ZF1.filelist: 
        fn = i.filename
        #if fn.find('/')>-1:
        #    fn = fn.split('/')[-1]
        #if not fn: continue
        FD[fn]=F
        
def loadTXdata(df,fd=FD,read_fn=pd.read_csv,dat_dir=DAT_DIR):
    zf  = fd[df]
    ZF1 = zipfile.ZipFile(dat_dir+zf,'r')
    d   = ZF1.extract(df)
    return read_fn(d)

> ChemicalFiles.zip 

TOX21IDs_v4b_23Oct2014_QCdetails.xlsx
	TOX21S_v4b_8599_23Oct2014.xlsx
	TOX21S_v4b_CID_structures.sdf
	ToxcastChemicalFiles_ReadMe_20141112.txt

> toxrefdb.zip 

toxrefdb/
	toxrefdb/README_ToxRefDB_20141106.docx
	toxrefdb/toxrefdb_endpoint_matrix_AUG2014_FOR_PUBLIC_RELEASE.csv
	toxrefdb/toxrefdb_nel_lel_noael_loael_summary_AUG2014_FOR_PUBLIC_RELEASE.csv
	toxrefdb/toxrefdb_study_tg_effect_endpoint_AUG2014_FOR_PUBLIC_RELEASE.csv

> Assay Annotation.zip 

ToxCast Assay Annotation  Study_Design_info_20141021.csv
	ToxCast Assay Annotation Assay_Target_Info_20141021.csv
	ToxCast_Assay_Annotation_Data_Users_Guide_20141021.pdf

> ToxCast_Tox21_Level5&6_20141022.zip 

ToxCast_Tox21_Level5&6_20141022.csv

> ToxCast_Summary_Files.zip 

AllResults_cyto_dist_141121.csv
	AllResults_fitc_Matrix_141121.csv
	AllResults_flags_141121.csv
	AllResults_hitc_Matrix_141121.csv
	AllResults_l4id_Matrix_141121.csv
	AllResults_logc_max_Matrix_141121.csv
	AllResults_logc_min_Matrix_141121.csv
	AllResults_max_mean_Matrix_141121.csv
	AllResults_max_med_Matrix_141121.csv
	AllResults_modl_ac10_Matrix_141121.csv
	AllResults_modl_acb_Matrix_141121.csv
	AllResults_modl_acc_Matrix_141121.csv
	AllResults_modl_ga_Matrix_141121.csv
	AllResults_modl_gw_Matrix_141121.csv
	AllResults_modl_la_Matrix_141121.csv
	AllResults_modl_lw_Matrix_141121.csv
	AllResults_modl_Matrix_141121.csv
	AllResults_modl_rmse_Matrix_141121.csv
	AllResults_modl_tp_Matrix_141121.csv
	AllResults_spid_Matrix_141121.csv
	AllResults_tested_Matrix_141121.csv
	AllResults_zscore_Matrix_141121.csv
	Assay_Summary_141121.csv
	Chemical_Summary_141121.csv

Load ToxCast Data¶

#Assay
A0  = loadTXdata( 'Assay_Summary_141121.csv')
A0.set_index(['aenm','aeid','acid','assay_source_name'],inplace=True)
#Chemicals
C0  = loadTXdata('TOX21S_v4b_8599_23Oct2014.xlsx',read_fn=pd.read_excel)
C0['TS_CASRN']=C0.TS_CASRN.apply(lambda x: x.replace("'",""))
C0['ID'] = C0.TS_CASRN.apply(lambda x: 'C'+x.replace('-',''))
C1 = C0[['ID','DSSTox_GSID','TS_CASRN','TS_ChemName']]
C1 = C1.rename(columns={'TS_CASRN':'chemical_casrn','TS_ChemName':'chemical_name'})
C0.set_index('ID',inplace=True)
#Bioactivity
B0= loadTXdata('AllResults_modl_ga_Matrix_141121.csv')
B0.rename(columns=({'Unnamed: 0':'ID'}),inplace=True)
B0.set_index('ID',inplace=True)
B1= loadTXdata('AllResults_modl_la_Matrix_141121.csv')
B1.rename(columns=({'Unnamed: 0':'ID'}),inplace=True)
B1.set_index('ID',inplace=True)
# What was tested
Bt = loadTXdata('AllResults_tested_Matrix_141121.csv')
Bt.rename(columns=({'Unnamed: 0':'ID'}),inplace=True)
Bt.set_index('ID',inplace=True)
# Set what was not test to Null 
# Everything that is null is inactive - replace nulls with very high conc
B0[B0.isnull()]=6
B1[B1.isnull()]=6
# Everything that was not tested is null
B0[Bt==0]=None
B1[B1.isnull()]=6

B0 = pd.merge(C1,B0.reset_index(),left_on='ID',right_on='ID')
B0.set_index(['ID','DSSTox_GSID','chemical_casrn','chemical_name'],inplace=True)


# Tox21 data
T21= loadTXdata('ToxCast_Tox21_Level5&6_20141022.csv')
T21.rename(columns=dict(spid='sample_id',casn='casrn',chnm='chemical_name',code='ID',aenm='assay_name'),inplace=True)

T21b = pd.pivot_table(T21,index=['sample_id','ID','casrn','chemical_name'],
                      columns='assay_name',values='hitc')
T21p = pd.pivot_table(T21,index=['sample_id','ID','casrn','chemical_name'],
                      columns='assay_name',values='hill_ga')

pickle.dump([A0,C0,C1,B1],file(PKL_DIR+'chm-bio-'+tmstmp+'.pkl','w'))

Load ToxRefDB Data¶

# Toxicity 
T1 = loadTXdata('toxrefdb/toxrefdb_study_tg_effect_endpoint_AUG2014_FOR_PUBLIC_RELEASE.csv')
T1.drop('Unnamed: 0',axis=1,inplace=True)
# Create ID
T1['ID']=T1.chemical_casrn.apply(lambda x: 'C'+x.replace('/','-').replace('-',''))
# DSSTox_GSID
T1['DSSTox_GSID'] = T1.chemical_id.apply(lambda x: ifthen(not x.find('CAS')>-1,x.split('_')[-1],None))

T1.shape

(184257, 68)

#T11 = T1.set_index('ID')
Ph1 = T1[(T1['data_source']=='opp_der')].ID.unique()
len(Ph1)

569

T1.columns

Index([u'chemical_id', u'chemical_casrn', u'chemical_name', u'chemical_sets',
       u'data_source', u'entry_status_id', u'entry_status', u'entry_level_id',
       u'entry_level', u'usability', u'usability_desc', u'study_id',
       u'source_study_numeric_id', u'source_study_alphanumeric_id', u'year',
       u'citation', u'guideline_no', u'guideline_name', u'study_type_id',
       u'study_type', u'species_id', u'species', u'strain', u'comments_animal',
       u'admin_method', u'admin_route', u'dose_start', u'dose_start_unit',
       u'dose_end', u'dose_end_unit', u'lot_batch', u'purity', u'source',
       u'ldt', u'hdt', u'toxrefdb_study_dose_unit', u'no_doses_tested',
       u'tg_id', u'generation', u'gender', u'dosing_period', u'dose_level',
       u'dose', u'toxrefdb_tg_dose_unit', u'duration', u'duration_unit',
       u'no_animals', u'effect_id', u'effect_type_id', u'effect_type',
       u'effect_target_id', u'effect_target', u'effect_desc_id',
       u'effect_desc', u'effect_direction_id', u'direction',
       u'effect_free_text', u'target_site', u'focal_diffuse', u'loael',
       u'effect_category', u'endpoint_category', u'endpoint_type',
       u'endpoint_system', u'endpoint_target', u'endpoint_lifestage', u'ID',
       u'DSSTox_GSID'],
      dtype='object')

T1[['chemical_id','chemical_name','study_id','source_study_numeric_id', u'citation']].ix[:50]

Categorize lesions¶

# Effect -> lesion type
Les_Cat = dict(( (E.name.lower(),[t.replace('les_cat:','').lower() for t in E.tags][0]) 
                for E in Entity.objects(tags__istartswith='les_cat:',name__exists=1) ))
#len(T1.effect_desc.unique())
#Les_Cat.items()[:20]
# Add to dict
Les_Cat['abnormal lobation']='other'
Les_Cat['leukemia lymphocytic']='neoplasia'
Les_Cat['carcinoma nos'] = 'neoplasia'
Les_Cat['mixed tumor malignant'] = 'neoplasia'

T1.effect_desc[pd.isnull(T1.effect_desc)]=''
T1['les_cat']=T1.effect_desc.apply(lambda x: Les_Cat.get(x.lower()))

[i.lower() for i in T1.study_type.unique()]

['mgr', 'chr', 'sac', 'sub', 'dev', 'rep', 'oth', 'acu', 'neu', 'dnt']

CAS_rn = Chemical.objects(tags='css_rn').distinct('casrn')
len(CAS_rn)
CAS_rn[:10]

[u'122931-48-0',
 u'26225-79-6',
 u'50594-66-6',
 u'62476-59-9',
 u'63748-59-4',
 u'94128-03-7',
 u'94128-04-8',
 u'2395-00-8',
 u'33496-48-9',
 u'335-67-1']

len(set(T1.chemical_casrn.unique()).intersection(CAS_rn))

60

I1 = T1.chemical_casrn.apply(lambda i: i in CAS_rn)
I2 = T1.study_type.apply(lambda i: i.lower() in ['chr', 'sub', 'acu'] )
I3 = T1.species.apply(lambda i: i.lower()=='rat')
I4 = np.logical_and(np.logical_and(I1,I2), I3)
sum(I4)

7296

TS1= T1.ix[T1.index[I4],
      ['DSSTox_GSID','chemical_id', u'chemical_casrn', u'chemical_name',
      'study_type', u'species', u'strain','admin_method', u'admin_route',
      'dose', u'toxrefdb_tg_dose_unit', u'duration', u'duration_unit','loael']
      ].drop_duplicates()
TS1.shape

(1596, 14)

I5 = np.logical_and(I4,T1.chemical_casrn=='4151-50-2')
T1.ix[T1.index[I5],['DSSTox_GSID','chemical_id', u'chemical_casrn', u'chemical_name',
      'study_type', u'species', u'strain','admin_method', u'admin_route',
      'dose', u'toxrefdb_tg_dose_unit', u'duration', u'duration_unit']]

I5 = T1.chemical_casrn=='4151-50-2'
T1.ix[T1.index[I5],['DSSTox_GSID','chemical_id', u'chemical_casrn', u'chemical_name',
      'study_type', u'species', u'strain','admin_method', u'admin_route',
      'dose', u'toxrefdb_tg_dose_unit', u'duration', u'duration_unit']]

TS11 = pd.pivot_table(TS1, 
                      index=['DSSTox_GSID','chemical_id', u'chemical_casrn', u'chemical_name',
                             'study_type','dose','duration','duration_unit'],
                      columns='toxrefdb_tg_dose_unit',
                      values='admin_route',
                     aggfunc=len)
TS11.shape

(1408, 1)

xl = pd.ExcelWriter('/share/home/ishah/tmp/toxref-rat-css-treatment-doses-v2.xlsx')
TS11.to_excel(xl,sheet_name='view')
TS11.reset_index().to_excel(xl,sheet_name='data')
xl.close()

T1.loael.unique()

array([ nan,   0.,  -1.])

Maximum treatment concentration for each study¶

# Figure out the maximum treatment concentration for each study type - This will be treatment concentration 
# up to which there was no effect 

TF = [['study_type']
     ]
# If a chemical has an effect in a study then all other specific effects that are NA will be set to zero

from ml.mlearn import concat_df
def mk_str(x):
    if type(x) ==tuple:
        return '_'.join([i.lower().replace('-','_').replace(' ','_') for i in x]) 
    elif type(x)==str:
        return x.lower().replace('-','_').replace(' ','_')
    else:
        return x

T_mt = pd.DataFrame()

for c_i in TF:

    T_i = pd.pivot_table(T1,index=['ID','DSSTox_GSID','chemical_name'],
                         columns=c_i,
                         values='dose',
                         aggfunc=np.min)

    #T_i.columns = mk_str([mk_str(jj) for jj in T_i.columns])
    
    if T_mt.shape[0]>0:
        T_mt = pd.merge(T_mt,T_i,how='outer',left_index=True,right_index=True)
    else:
        T_mt = T_i
    
#I = [i for i in T2.columns if re.search('chr.+liver',i,re.I)]
#I

Aggregating toxicity¶

# Toxicity -> factors 

TF = [#['study_type'],
    #['species','study_type','effect_target'],
    # ['study_type','effect_target'],
     ['study_type','species','effect_target','les_cat'],
     #['study_type','species','effect_target'],
     #['study_type','species','effect_target','les_cat']
     ]
# If a chemical has an effect in a study then all other specific effects that are NA will be set to zero

from ml.mlearn import concat_df
def mk_str(x):
    if type(x) ==tuple:
        return '_'.join([i.lower().replace('-','_').replace(' ','_') for i in x]) 
    elif type(x)==str:
        return x.lower().replace('-','_').replace(' ','_')
    else:
        return x

T2 = pd.DataFrame()

for c_i in TF:

    T_i = pd.pivot_table(T1,index=['ID','DSSTox_GSID','chemical_name'],
                         columns=c_i,
                         values='dose',
                         aggfunc=np.min)

    #T_i.columns = mk_str([mk_str(jj) for jj in T_i.columns])
    
    if T2.shape[0]>0:
        T2 = pd.merge(T2,T_i,how='outer',left_index=True,right_index=True)
    else:
        T2 = T_i
    
#I = [i for i in T2.columns if re.search('chr.+liver',i,re.I)]
#I

Filling missing data¶

All toxic effects that are not significant are not reported. This produces a great deal of missing data. We need an approach to differentiate between unknown effects and effects that are not significant. We assume that if a particular guideline study was conducted but the effects were not reported then a chemical would be negative for that particular effect for that type of guideline study. However, this ignores the effect of species and sex.

for study in set([i[0] for i in T2.columns]):
    Yij=T2[(study)]
    I = Yij.isnull()
    I1 = I.sum(axis=1)<Yij.shape[1]
    I2=I.apply(lambda y: y & I1)
    T2[(study)][I2]=0

T2.shape

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-93734ea7d50c> in <module>()
----> 1 T2.shape

NameError: name 'T2' is not defined

I = [i for i in T2.columns if i[1]=='rat']
Rat_liver=T2[I]
pickle.dump(Rat_liver,file(PKL_DIR+'rat-liver-effects-'+tmstmp+'.pkl','w'))

pickle.dump([T2,T_mt],file(PKL_DIR+'tox-'+tmstmp+'.pkl','w'))

PKL_DIR

'/share/home/ishah/projects/Chem/data/pickle/'

CID=[u'C335762',
 u'C307244',
 u'C375951',
 u'C1763231',
 u'C335671',
 u'C4151502',
 u'C375859',
 u'C2795393',
 u'C29420493',
 u'C3825261',
 u'C3871996',
 u'C754916',
 u'C2058948']

#C0.STRUCTURE_MW[CID]
C0.ix[CID].T

Y1 = T2.ix[CID]
#Y1[Y1.notnull()]
I=Y1.notnull().sum()>1
Y1 = T2.ix[CID,I]
#Y1.apply(lambda x: Y1.columns[x==x.min()][0],axis=0)
Y1.T
I = np.logical_not(np.all(np.logical_or(Y1.isnull(),Y1==0),axis=0))
Y1.ix[:,I].T

for study in set([i[0] for i in Y1.columns]):
    Yij=Y1[(study)]
    I = Yij.isnull()
    I1 = I.sum(axis=1)<Yij.shape[1]
    I2=I.apply(lambda y: y & I1)
    Y1[(study)][I2]=0

X1 = np.log10(Y1.apply(lambda x: x/(1000*C0.STRUCTURE_MW[ID1]),axis=0)).T
X1[np.isinf(X1)]=0
X1

Liv = [i for i in T2.columns if i[1].startswith('Liver')]

T2.ix[:20,Liv]

pd.pivot_table(T11.ix['C101200480'],columns=['study_type','species'],index='effect_target',values='dose')

pickle.dump([T2,T_mt],file(PKL_DIR+'tox-'+tmstmp+'.pkl','w'))

MultiIndex(levels=[[u'ACU', u'CHR', u'DEV', u'DNT', u'MGR', u'NEU', u'OTH', u'REP', u'SAC', u'SUB'], [u'Abdominal Cavity', u'Active Avoidance', u'Adrenal Gland', u'Age Landmark', u'Aorta', u'Aortic arch', u'Artery (General)', u'Auditory Startle Reflex Habituation', u'Bile duct', u'Bladder', u'Blood', u'Blood vessel', u'Body Weight', u'Bone', u'Bone Marrow', u'Brain', u'Bronchus', u'Cervix', u'Classical conditioning', u'Clinical Chemistry', u'Clinical Signs', u'Clitoral Gland', u'Coagulating Gland', u'Coordination', u'Delayed Alternation', u'Developmental Landmark', u'Diaphragm', u'Ductus arteriosus', u'Ear', u'Epididymis', u'Esophagus', u'Estrous Cycle', u'Estrous cycle length', u'Eye', u'Face', u'Food Consumption', u'Gallbladder', u'General', u'Gonad', u'Great vessels', u'Hair Growth', u'Harderian Gland', u'Heart', u'Hematology', u'Innominate artery', u'Instrumental conditioning', u'Interparietal', u'Intestine Large', u'Intestine Small', u'Intestines', u'Kidney', u'Lacrimal Gland', u'Large Intestine', u'Larynx', u'Limb', u'Liver', u'Locomotion', u'Lung', u'Lymph Node', u'Mammary Gland', u'Maternal Wastage', u'Maze', u'Mesentery', u'Mortality', u'Motor activity', u'Mouth / Jaw', u'Nasal', u'Nerve', u'Nose', u'Offspring Survival-Early', u'Offspring Survival-Late', u'Oral Mucosa', u'Other', u'Ovary', u'Oviduct', u'Pancreas', u'Parathyroid', u'Parathyroid Gland', u'Passive Avoidance', u'Paw / Digit', u'Penis', u'Peritoneum', u'Pharynx', u'Pituitary Gland', u'Placenta', u'Pleura', u'Preputial Gland', u'Presphenoid', u'Prostate', u'Pulmonary artery', u'Radius', u'Reflexes', u'Reproductive Outcome', u'Reproductive Performance', u'Salivary glands', u'Seminal Vesicle', u'Sexual Developmental Landmark', u'Skeletal Muscle', u'Skin', u'Sperm Measure', u'Sperm morphology', u'Spinal cord', u'Spleen', u'Squamosal', u'Stomach', u'Subclavian artery', u'Testes', u'Thoracic Cavity', u'Thymus', u'Thyroid Gland', u'Tissue NOS', u'Tongue', u'Tooth', u'Trachea', u'Trunk', u'Ulna', u'Uncertain Primary Site', u'Ureter', u'Urethra', u'Urinalysis', u'Urinary Bladder', u'Uterus', u'Vagina', u'Water Consumption', u'Zygomatic', u'Zymbal's Gland', u'[Clinical]', u'[Not In List]']],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 12, 19, 20, 33, 35, 47, 48, 57, 63]],
           names=[u'study_type', u'effect_target'])

Chemical fingerprints¶

# Chemical fingerprints

from rdkit import Chem
from rdkit.DataStructs import *
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import AllChem
from rdkit.Chem.SaltRemover import SaltRemover

df  = 'TOX21S_v4b_CID_structures.sdf'
zf  = FD[df]
ZF1 = zipfile.ZipFile(DAT_DIR+zf,'r')
d   = ZF1.extract(df)
suppl = Chem.SDMolSupplier(d)

# Map DSSTox_CID to ID
CID2ID = dict(zip(C0.DSSTox_CID,C0.index))
MOLS = {}
for m in suppl:
    if not m: continue
    if 'DSSTox_CID' not in m.GetPropNames(): 
        continue
    k = CID2ID.get(int(m.GetProp('DSSTox_CID')))
    if not k: continue
    MOLS[k] = m

from rdkit.Chem import MACCSkeys

FP1 = pd.DataFrame([np.array(AllChem.GetMorganFingerprintAsBitVect(i,3,1024)) for i in MOLS.values()])
FP1.index=MOLS.keys()
FP1.columns = ['mrgn_%d'%i for i in FP1.columns]

FP2 = pd.DataFrame([np.array(MACCSkeys.FingerprintMol(i)) for i in MOLS.values()])
FP2.index=MOLS.keys()
FP2.columns = ['mccs_%d'%i for i in FP2.columns]

FP3 = pd.DataFrame([np.array(AllChem.GetHashedTopologicalTorsionFingerprintAsBitVect(i)) for i in MOLS.values()])
FP3.index=MOLS.keys()
FP3.columns = ['tptr_%d'%i for i in FP3.columns]

FP0 = pd.merge(FP1,FP2,left_index=True,right_index=True)
FP0 = pd.merge(FP0,FP3,left_index=True,right_index=True)
FP0.index.names=['ID']

#pickle.dump([C0,FP0,FP1,FP2,FP3],file(PKL_DIR+'chm-'+tmstmp+'.pkl','w'))
pickle.dump(MOLS,file(PKL_DIR+'mols-'+tmstmp+'.pkl','w'))

Store everything to tables¶

DAT_DIR = '/share/home/ishah/projects/Chem/data/tables/'

if False:
    T2.to_csv(DAT_DIR+'tox-v1.csv')
    FP0.to_csv(DAT_DIR+'chmfp-v1.csv')
    B1.to_csv(DAT_DIR+'bio-v1.csv')
    T_mt.to_csv(DAT_DIR+'tox-max-trt-v1.csv')

    W = pd.ExcelWriter(DAT_DIR+'chm-v1.xlsx')
    C1.to_excel(W,sheet_name='All')
    W.save()

Merge Chm, Bio, Tox¶

[i for i in T2.ix[:10,:10].columns]
T3 = T2.copy()
T3.columns = [i[0].lower() +'_'+i[1].lower().replace(' ','_') for i in T3.columns]
Tox = T3.columns

Continuous¶

Bio=B0.columns
Tox=T2.columns
Chm=FP0.columns

X0 = B0.copy()
X0[X0<6]=1
X0[X0==6]=0

BCb = pd.merge(X0.reset_index(),FP0.reset_index(),how='outer',left_on='ID',right_on='ID')
BCTb= pd.merge(BCb,T3.reset_index().drop(['DSSTox_GSID','chemical_name'],axis=1),how='outer',
               left_on='ID',right_on='ID')

BCTb= BCTb.set_index(['ID']).drop(['chemical_name','chemical_casrn','DSSTox_GSID'],axis=1)
BCb = BCb.set_index(['ID']).drop(['chemical_name','chemical_casrn','DSSTox_GSID'],axis=1)
print 'All',BCTb.shape
print 'Bio & Chm',BCb.shape

All (8642, 4634)
Bio & Chm (8403, 4060)

BCTb.ix[CID,Tox[-10:]]

Binary¶

X0 = B0.copy()
X0[X0<6]=1
X0[X0==6]=0
BCc = pd.merge(X0.reset_index(),FP0.reset_index(),how='inner',left_on='ID',right_on='ID')
BCTc= pd.merge(BCc,T3.reset_index().drop(['DSSTox_GSID','chemical_name'],axis=1),how='inner',
               left_on='ID',right_on='ID')
BCc = BCc.set_index('ID').drop(['DSSTox_GSID','chemical_casrn','chemical_name'],axis=1)
BCTc= BCTc.set_index('ID').drop(['DSSTox_GSID','chemical_casrn','chemical_name'],axis=1)
BCc.shape,BCTc.shape

((1792, 4060), (607, 4634))

tmstmp

'02-08-2016'

Pickle everything for usage elsewhere¶

pickle.dump([BCc,BCTc,BCTb,Bio,Chm,Tox],file(PKL_DIR+'tx-tr-ch-'+tmstmp+'.pkl','w'))

pickle.dump(BCTb,file(PKL_DIR+'BCTb-'+tmstmp+'.pkl','w'))

BCTb.to_csv(DAT_DIR+'BCTb-'+tmstmp+'.csv')

os.listdir(DAT_DIR)

['tox-max-trt-v1.csv',
 'chm-v1.xlsx',
 'tox-v1.csv',
 'chm-v1.csv',
 'tox21-chm-v1.xlsx',
 'bio-v1.csv',
 'chmfp-v1.csv',
 'BCTb-02-12-2015.csv']

Load the data from pickles¶

print "\n".join(os.listdir(PKL_DIR))
[BCc,BCTc,Bio,Chm,Tox] = pickle.load(file(PKL_DIR+'tx-tr-ch-02-12-2015.pkl','r'))
[A0,C0,C1,B1] = pickle.load(file(PKL_DIR+'chm-bio-02-12-2015.pkl','r'))
[T2,T_mt] = pickle.load(file(PKL_DIR+'tox-02-12-2015.pkl','r'))

	chemical_id	chemical_name	study_id	source_study_numeric_id	citation
0	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
1	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
2	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
3	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
4	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
5	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
6	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
7	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
8	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
9	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
10	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
11	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
12	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
13	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
14	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
15	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
16	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
17	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
18	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
19	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
20	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
21	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
22	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
23	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
24	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
25	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
26	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
27	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
28	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
29	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
30	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
31	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
32	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
33	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
34	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
35	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
36	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
37	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
38	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
39	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
40	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
41	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
42	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
43	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
44	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
45	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
46	DSSTox_GSID_40801	(+-)-Indoxacarb	3258	44477144	Breslin, W. (1997) Two Generation Reproduction...
47	DSSTox_GSID_20895	Mirex	6306	NaN	Huff J (1990) TOXICOLOGY AND CARCINOGENESIS ST...
48	DSSTox_GSID_47282	(2R)-2-{4-[({[2-(1,3-benzodioxol-5-yloxy)pyrid...	7121	6448	Pfizer compound CP-671305:14 DAY EXPLORATORY T...
49	DSSTox_GSID_47282	(2R)-2-{4-[({[2-(1,3-benzodioxol-5-yloxy)pyrid...	7121	6448	Pfizer compound CP-671305:14 DAY EXPLORATORY T...
50	DSSTox_GSID_47282	(2R)-2-{4-[({[2-(1,3-benzodioxol-5-yloxy)pyrid...	7121	6448	Pfizer compound CP-671305:14 DAY EXPLORATORY T...

	DSSTox_GSID	chemical_id	chemical_casrn	chemical_name	study_type	species	strain	admin_method	admin_route	dose	toxrefdb_tg_dose_unit	duration	duration_unit
162978	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162979	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162980	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162981	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162982	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162983	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162984	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162985	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162986	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162987	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162988	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162989	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162990	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162991	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162992	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162993	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162994	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162995	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162996	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162997	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162998	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
162999	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
163000	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
163001	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	7.50	mg/kg/day	90	day
163002	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	2.50	mg/kg/day	90	day
163003	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	2.50	mg/kg/day	90	day
163004	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	2.50	mg/kg/day	90	day
163005	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	2.50	mg/kg/day	90	day
163006	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	2.50	mg/kg/day	90	day
163007	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	SUB	rat	Sprague Dawley (CD)	Feed	Oral	2.50	mg/kg/day	90	day
...	...	...	...	...	...	...	...	...	...	...	...	...	...
163222	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.59	mg/kg/day	NaN	NaN
163223	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.59	mg/kg/day	NaN	NaN
163224	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.59	mg/kg/day	NaN	NaN
163225	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.15	mg/kg/day	NaN	NaN
163226	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.59	mg/kg/day	10	week
163227	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.59	mg/kg/day	10	week
163228	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163229	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163230	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.17	mg/kg/day	NaN	NaN
163231	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.15	mg/kg/day	NaN	NaN
163232	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.15	mg/kg/day	10	week
163233	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.53	mg/kg/day	NaN	NaN
163234	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.52	mg/kg/day	NaN	NaN
163235	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.52	mg/kg/day	10	week
163236	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.45	mg/kg/day	NaN	NaN
163237	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.45	mg/kg/day	NaN	NaN
163238	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.45	mg/kg/day	10	week
163239	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	0.18	mg/kg/day	NaN	NaN
163240	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163241	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163242	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163243	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163244	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163245	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163246	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163247	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163248	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163249	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163250	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN
163251	32646	DSSTox_GSID_32646	4151-50-2	Sulfluramid	MGR	rat	Sprague Dawley	Feed	Oral	1.34	mg/kg/day	NaN	NaN

ID	C335762	C307244	C375951	C1763231	C335671	C4151502	C375859	C2795393	C29420493	C3825261	C3871996	C754916	C2058948
DSSTox_RID	78891	78892	78893	78894	78895	79022	79319	79357	79358	79359	79360	79421	82415
DSSTox_GSID	31860	31862	31863	31864	31865	32646	37303	37706	37707	37708	37709	38939	47553
DSSTox_CID	11860	11862	11863	11864	11865	12646	17303	17706	17707	17708	17709	18939	27553
TS_ChemName	PFDA	PFHxA	PFNA	PFOS	PFOA	Sulfluramid	Perfluoroheptanoic acid	PFOS-K	PFBS-K	PFOA, ammonium salt	PFHS-K	PFOSA	Perfluoroundecanoic acid
TS_ChemName_Synonyms	Perfluorodecanoic acid (PFDA)	Perfluorohexanoic acid (PFHXA)	Perfluorononanoic acid (PFNA)	NaN	Perfluorooctanoic acid (PFOA)	NaN	NaN	Potassium perfluorooctanesulfonate (PFOS, K salt)	Potassium perfluorobutanesulfonate (PFBS, K salt)	Ammonium perfluorooctanoate (PFOA, ammonium salt)	Potassium perfluorohexanesulfonate (PFHS, K salt)	Perfluorooctanesulfonamide (PFOSA)	NaN
TS_CASRN	335-76-2	307-24-4	375-95-1	1763-23-1	335-67-1	4151-50-2	375-85-9	2795-39-3	29420-49-3	3825-26-1	3871-99-6	754-91-6	2058-94-8
TS_Description	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound	single chemical compound
ChemNote	NaN	NaN	NaN	NaN	NaN	NaN	NaN	parent [1763-23-1]	parent [375-73-5]	parent [335-67-1]	parent [355-46-4]	NaN	NaN
STRUCTURE_Shown	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical	tested chemical
STRUCTURE_Formula	C10HF19O2	C6HF11O2	C9HF17O2	C8HF17O3S	C8HF15O2	C10H6F17NO2S	C7HF13O2	C8F17KO3S	C4F9KO3S	C8H4F15NO2	C6F13KO3S	C8H2F17NO2S	C11HF21O2
STRUCTURE_MW	514.0834	314.0534	464.0759	500.1296	414.0684	527.198	364.0609	538.22	338.1899	431.0989	438.2049	499.1448	564.0909
STRUCTURE_ChemType	defined organic	defined organic	defined organic	defined organic	defined organic	defined organic	defined organic	defined organic	defined organic	defined organic	defined organic	defined organic	defined organic
STRUCTURE_DefinedOrganicForm	parent	parent	parent	parent	parent	parent	parent	salt	salt	salt	salt	parent	parent
STRUCTURE_IUPAC	nonadecafluorodecanoic acid	undecafluorohexanoic acid	heptadecafluorononanoic acid	1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecaflu...	pentadecafluorooctanoic acid	N-ethyl-1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-hept...	tridecafluoroheptanoic acid	potassium 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-he...	potassium 1,1,2,2,3,3,4,4,4-nonafluorobutane-1...	ammonium pentadecafluorooctanoate	potassium 1,1,2,2,3,3,4,4,5,5,6,6,6-tridecaflu...	1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecaflu...	henicosafluoroundecanoic acid
STRUCTURE_SMILES	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)F	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)S(=O)(=O)O)C(F)(F)C(F)(F)C(F)(F)C...	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F	[K+].FC(F)(C(F)(F)S([O-])(=O)=O)C(F)(F)C(F)(F)...	[K+].FC(F)(C(F)(F)S([O-])(=O)=O)C(F)(F)C(F)(F)F	[NH4+].FC(F)(C(F)(F)C([O-])=O)C(F)(F)C(F)(F)C(...	[K+].FC(F)(C(F)(F)S([O-])(=O)=O)C(F)(F)C(F)(F)...	FC(F)(C(F)(F)S(N)(=O)=O)C(F)(F)C(F)(F)C(F)(F)C...	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...
STRUCTURE_SMILES_Desalt	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)F	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)S(=O)(=O)O)C(F)(F)C(F)(F)C(F)(F)C...	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F	FC(F)(C(F)(F)S(=O)(=O)O)C(F)(F)C(F)(F)C(F)(F)C...	FC(F)(C(F)(F)S(=O)(=O)O)C(F)(F)C(F)(F)F	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...	FC(F)(C(F)(F)S(=O)(=O)O)C(F)(F)C(F)(F)C(F)(F)C...	FC(F)(C(F)(F)S(N)(=O)=O)C(F)(F)C(F)(F)C(F)(F)C...	FC(F)(C(F)(F)C(=O)O)C(F)(F)C(F)(F)C(F)(F)C(F)(...
STRUCTURE_InChIS_v0	InChI=1S/C10HF19O2/c11-2(12,1(30)31)3(13,14)4(...	InChI=1S/C6HF11O2/c7-2(8,1(18)19)3(9,10)4(11,1...	InChI=1S/C9HF17O2/c10-2(11,1(27)28)3(12,13)4(1...	InChI=1S/C8HF17O3S/c9-1(10,3(13,14)5(17,18)7(2...	InChI=1S/C8HF15O2/c9-2(10,1(24)25)3(11,12)4(13...	InChI=1S/C10H6F17NO2S/c1-2-28-31(29,30)10(26,2...	InChI=1S/C7HF13O2/c8-2(9,1(21)22)3(10,11)4(12,...	InChI=1S/C8HF17O3S.K/c9-1(10,3(13,14)5(17,18)7...	InChI=1S/C4HF9O3S.K/c5-1(6,3(9,10)11)2(7,8)4(1...	InChI=1S/C8HF15O2.H3N/c9-2(10,1(24)25)3(11,12)...	InChI=1S/C6HF13O3S.K/c7-1(8,3(11,12)5(15,16)17...	InChI=1S/C8H2F17NO2S/c9-1(10,3(13,14)5(17,18)7...	InChI=1S/C11HF21O2/c12-2(13,1(33)34)3(14,15)4(...
STRUCTURE_InChIKey_v0	PCIUEQPBYFRTEM-UHFFFAOYSA-N	PXUULQAPEKKVAH-UHFFFAOYSA-N	UZUFPBIDKMEQEQ-UHFFFAOYSA-N	YFSUTJLHUFNCNZ-UHFFFAOYSA-N	SNGREZUHAYWORS-UHFFFAOYSA-N	CCEKAJIANROZEO-UHFFFAOYSA-N	ZWBAMYVPMDSJGQ-UHFFFAOYSA-N	WFRUBUQWJYMMRQ-UHFFFAOYSA-M	LVTHXRLARFLXNR-UHFFFAOYSA-M	YOALFLHFSFEMLP-UHFFFAOYSA-N	RSCGQEBKFSGWJT-UHFFFAOYSA-M	RRRXPPIDPYTNJG-UHFFFAOYSA-N	SIDINRCMMRKXGQ-UHFFFAOYSA-N
Substance_modify_yyyymmdd	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.008043e+07	2.010093e+07

	ID	C307244	C1763231	C4151502	C2795393	C29420493	C3825261
	DSSTox_GSID	31862	31864	32646	37706	37707	37708
	chemical_name	Perfluorohexanoic acid	Perfluorooctane sulfonic acid	Sulfluramid	Potassium perfluorooctanesulfonate	Potassium nonafluoro-1-butanesulfonate	Ammonium perfluorooctanoate
study_type	effect_target
CHR	Body Weight	NaN	NaN	NaN	-5.73096	NaN	-4.482289
CHR	Liver	NaN	NaN	NaN	-6.73096	NaN	-4.482289
DEV	Body Weight	NaN	-5.699083	-5.244852	NaN	NaN	-3.935607
	Bone	NaN	0.000000	-4.598122	NaN	NaN	-3.935607
	Clinical Signs	NaN	0.000000	-4.598122	NaN	NaN	-3.458486
MGR	Adrenal Gland	NaN	NaN	-5.594869	0.00000	0.000000	-4.634577
	Body Weight	NaN	NaN	-5.594869	-6.12890	-2.529161	-5.634577
	Brain	NaN	NaN	-5.594869	0.00000	-2.529161	0.000000
	Clinical Signs	NaN	NaN	0.000000	-5.52684	-2.529161	-4.634577
	Food Consumption	NaN	NaN	0.000000	-5.52684	-3.052039	0.000000
	Kidney	NaN	NaN	-5.594869	0.00000	-3.052039	-5.634577
	Liver	NaN	NaN	-5.594869	0.00000	-3.052039	-5.634577
	Offspring Survival-Late	NaN	NaN	-5.594869	-5.52684	0.000000	-4.157456
	Sexual Developmental Landmark	NaN	NaN	-5.594869	0.00000	-2.529161	-4.157456
SUB	Body Weight	-4.497003	NaN	-5.324034	0.00000	0.000000	-4.458486
	Clinical Chemistry	-3.798033	NaN	-5.324034	-5.73096	-2.751009	-4.458486
	Clinical Signs	0.000000	NaN	-5.324034	0.00000	-2.751009	0.000000
	Food Consumption	-4.497003	NaN	-5.324034	-5.73096	0.000000	-4.935607
	Hematology	-3.195974	NaN	-4.846912	-5.73096	-3.228131	0.000000
	Kidney	-3.195974	NaN	-4.356860	0.00000	-2.751009	-5.458486
	Liver	-3.195974	NaN	-5.324034	-6.33302	0.000000	-4.935607
	Spleen	0.000000	NaN	-4.356860	0.00000	-3.751009	0.000000
	Stomach	0.000000	NaN	-4.846912	0.00000	-2.751009	0.000000

		study_type	CHR	DEV	DNT	MGR	NEU	OTH	REP	SAC	SUB
		effect_target	Liver	Liver	Liver	Liver	Liver	Liver	Liver	Liver	Liver
ID	DSSTox_GSID	chemical_name
C100005	20281	1-Chloro-4-nitrobenzene	NaN	NaN	NaN	0	NaN	NaN	NaN	NaN	3.0
C100016	20961	4-Nitroaniline	1.5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	10.0
C100027	21834	4-Nitrophenol	NaN	0	NaN	NaN	NaN	NaN	NaN	NaN	NaN
C10016203	30698	alpha-Cyclodextrin	NaN	0	NaN	NaN	NaN	NaN	NaN	NaN	NaN
C100210	26080	Terephthalic acid	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0
C100378	21837	N,N-Diethylethanolamine	NaN	0	NaN	NaN	NaN	NaN	NaN	NaN	75.0
C100425	21284	Styrene	NaN	NaN	0	NaN	NaN	NaN	NaN	NaN	NaN
C10043353	20194	Boric acid	0.0	163	NaN	4500	NaN	NaN	NaN	NaN	250.0
C10049044	23958	Chlorine dioxide	NaN	NaN	NaN	0	NaN	NaN	NaN	NaN	NaN
C100641	21842	Cyclohexanone oxime	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	287.0
C1007289	37495	Deisopropylatrazine	NaN	0	NaN	NaN	NaN	NaN	NaN	NaN	0.0
C100784201	34650	Halosulfuron-methyl	0.0	0	NaN	0	NaN	NaN	NaN	NaN	497.0
C100970	20692	Methenamine	NaN	0	NaN	NaN	NaN	NaN	NaN	NaN	NaN
C101053	20089	Anilazine	NaN	0	NaN	0	NaN	NaN	NaN	NaN	38.5
C101100	34232	Cloprop	25.0	0	NaN	250	NaN	NaN	NaN	NaN	NaN
C10118908	45033	Minocycline	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN
C101200480	24101	Tribenuron-methyl	0.0	500	NaN	0	NaN	NaN	NaN	NaN	118.0
C101213	20764	Chlorpropham	350.0	1000	NaN	150	NaN	NaN	NaN	NaN	276.0
C101542	25895	N-Phenyl-1,4-benzenediamine	NaN	0	NaN	NaN	NaN	NaN	NaN	NaN	NaN
C101611	20869	4,4-Methylenebis(N,N-dimethylaniline)	187.5	NaN	NaN	NaN	NaN	NaN	NaN	0	NaN

study_type	CHR			DEV		MGR	SUB
species	dog	mouse	rat	rabbit	rat	rat	dog	rat
effect_target
Body Weight	51.740000	214	30.25	80	312.5000	55.111765	NaN	243.500000
Bone	NaN	NaN	NaN	NaN	382.8125	NaN	NaN	NaN
Brain	NaN	NaN	NaN	NaN	NaN	NaN	NaN	243.500000
Clinical Chemistry	42.086667	NaN	NaN	NaN	NaN	NaN	NaN	251.777778
Clinical Signs	NaN	214	NaN	80	312.5000	NaN	NaN	226.500000
Epididymis	NaN	214	NaN	NaN	NaN	NaN	NaN	NaN
Eye	NaN	NaN	62.50	NaN	NaN	NaN	NaN	NaN
Food Consumption	NaN	NaN	NaN	80	312.5000	81.500000	NaN	243.500000
General	NaN	NaN	NaN	NaN	500.0000	NaN	NaN	NaN
Heart	NaN	NaN	62.50	NaN	NaN	NaN	NaN	243.500000
Hematology	NaN	NaN	NaN	NaN	NaN	NaN	73.3	NaN
Kidney	NaN	214	62.50	NaN	NaN	NaN	NaN	243.500000
Liver	NaN	NaN	NaN	NaN	500.0000	NaN	NaN	268.600000
Lung	NaN	NaN	NaN	NaN	NaN	50.750000	NaN	NaN
Mammary Gland	NaN	NaN	62.50	NaN	NaN	NaN	NaN	NaN
Maternal Wastage	NaN	NaN	NaN	80	NaN	NaN	NaN	NaN
Offspring Survival-Early	NaN	NaN	NaN	NaN	500.0000	NaN	NaN	NaN
Pancreas	NaN	NaN	62.50	NaN	NaN	NaN	NaN	NaN
Reproductive Performance	NaN	NaN	NaN	80	NaN	NaN	NaN	NaN
Seminal Vesicle	NaN	NaN	62.50	NaN	NaN	NaN	NaN	NaN
Skin	NaN	NaN	62.50	NaN	NaN	NaN	NaN	NaN
Spleen	NaN	NaN	62.50	NaN	NaN	50.750000	NaN	174.400000
Stomach	NaN	NaN	62.50	NaN	NaN	NaN	NaN	NaN
Testes	NaN	214	NaN	NaN	NaN	NaN	NaN	226.500000
Thyroid Gland	NaN	NaN	NaN	NaN	NaN	NaN	78.0	NaN
Urinalysis	29.810000	NaN	NaN	NaN	NaN	NaN	NaN	NaN
Uterus	NaN	NaN	62.50	NaN	NaN	NaN	NaN	NaN
[Not In List]	NaN	NaN	NaN	80	NaN	NaN	NaN	NaN