%load_ext autoreload
%autoreload 2
%load_ext sql 
%pylab inline
%matplotlib inline

import matplotlib.text as text
from IPython.html.widgets import FloatProgress
from IPython.display import display
import scipy.interpolate as interp
import pandas as pd
#from mp.txpepa import *
from bio.data.toxplorer import *
import bio.hts.apredica as apr
from bio.hts.htsdb import *
from bio.data.toxplorer import *
import viz.clust as cv
from chem.clust import *
from sklearn import (manifold, datasets, decomposition, ensemble, lda,
                     random_projection)
from sklearn.metrics.pairwise import euclidean_distances,manhattan_distances
import statsmodels.api as sm
import numpy.linalg as LA
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector
stats = importr('stats')
from sklearn.neighbors import KNeighborsClassifier
from genra.readacross import *

mng.register_connection("hts-db","htsdb",username="ishah",
                        password="xxx",host='localhost')
mng.register_connection("txp-db","toxplorerdb",username="ishah",
                        password="xxx",host='localhost')

%sql postgresql://ishah:xxx@localhost/chemicals

DAT_DIR = '/share/home/ishah/projects/Chem/data/tables/'
PKL_DIR = '/share/home/ishah/projects/Chem/data/pickle/'
RES_DIR='/share/home/ishah/projects/Chem/data/results/'
FIG_DIR='/share/home/ishah/projects/Chem/figs/readacross/'

import pickle

tmstmp = time.strftime("%m-%d-%Y",time.localtime())

Populating the interactive namespace from numpy and matplotlib

WARNING: pylab import has clobbered these variables: ['connect', 'copy', 'flatten']
`%matplotlib` prevents importing * from pylab and numpy

Initialization¶

# Start the parallel machine
from ml.mlearn import *
from ml.readacross import *
import IPython.parallel as PP
%reload_ext autoreload
%autoreload 2

lb_view=None
d_view =None

def initParallel(Data=None):
    RC = PP.Client(profile='galaxy_parallel')
    global lb_view
    global d_view
    d_view = RC[:]
    d_view.block = True
    lb_view = RC.load_balanced_view()
    lb_view.block = True
    d_view.execute("""
%load_ext autoreload
%autoreload 2
import ml.mlearn as ml
from ml.mlearn import *
from ml.readacross import *
from chem.clust import *
    """)
    
    if Data:
        d_view.push(Data)

Load the data from pickles¶

print "\n".join([i for i in os.listdir(PKL_DIR) if i.startswith('tx-tr')])
[BCc,BCTc,BCTb,Bio,Chm,Tox] = pd.read_pickle(PKL_DIR+'tx-tr-ch-02-08-2016.pkl')

tx-tr-ch-02-08-2016.pkl
tx-tr-ch-11-16-2015.pkl
tx-tr-ch-02-12-2015.pkl

#print "\n".join(os.listdir(PKL_DIR))
[A0,C0,C1,B1] = pd.read_pickle(PKL_DIR+'chm-bio-02-12-2015.pkl')
MOLS = pd.read_pickle(PKL_DIR+'mols-02-12-2015.pkl')
[C2M,M2C,Cl_st,C2] = pd.read_pickle(PKL_DIR+'clust-02-12-2015.pkl')
C2 = C1.set_index('ID')

# Chemical fingerprints

from rdkit import Chem
from rdkit.DataStructs import *
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import AllChem


FP1 = pd.DataFrame([np.array(AllChem.GetMorganFingerprintAsBitVect(i,3,1024)) for i in MOLS.values()])
FP1.index=MOLS.keys()
FP1.columns = ['mrgn_%d'%i for i in FP1.columns]

FP2 = pd.DataFrame([np.array(MACCSkeys.FingerprintMol(i)) for i in MOLS.values()])
FP2.index=MOLS.keys()
FP2.columns = ['mccs_%d'%i for i in FP2.columns]

FP3 = pd.DataFrame([np.array(AllChem.GetHashedTopologicalTorsionFingerprintAsBitVect(i)) for i in MOLS.values()])
FP3.index=MOLS.keys()
FP3.columns = ['tptr_%d'%i for i in FP3.columns]

FP0 = pd.merge(FP1,FP2,left_index=True,right_index=True)
FP0 = pd.merge(FP0,FP3,left_index=True,right_index=True)
FP0.index.names=['ID']

#Distance calculations

# Distance calculations
# make a binary rep
X = BCTb[Bio]
CID = X.index
X = X.fillna(0)
X[X!=0]=1
D_bio = pd.DataFrame(squareform(pdist(X,'jaccard')),
                     columns=CID,index=CID)
S_bio = 1-D_bio

S_bio.shape

(8642, 8642)

X = BCTb[Chm]
CID = X.index
X = X.fillna(0)
X[X!=0]=1
D_chm = pd.DataFrame(squareform(pdist(X,'jaccard')),
                     columns=CID,index=CID)
S_chm = 1-D_chm

X = BCTb[Bio+Chm]
CID = X.index
X = X.fillna(0)
X[X!=0]=1
D_bc = pd.DataFrame(squareform(pdist(X,'jaccard')),
                    columns=CID,index=CID)
S_bc = 1-D_bc

S_bc.shape

(8642, 8642)

ChmNm1 = C1[['ID','chemical_name']]
ChmNm1.set_index('ID',inplace=True)
STUDIES = ['mgr', 'chr', 'sac', 'sub', 'dev', 'rep', 'oth', 'acu', 'neu', 'dnt']

#pd.merge(C2.ix[CID],BCTb.ix[CID,STUDIES],left_index=True,right_index=True)

Search chemicals by clusterid

R= C1.ix[C1.chemical_name.apply(lambda i: i.lower().find("propiconazole")>-1)]
print R
C2M['C94361065']

len(Tox)

574

#BCTb0 = BCTb.copy()
BCTb[BCTb>0]=1

Nearest neighbour viz </h3>

CD = ChemDrawing()

cl = 5
CID = M2C[cl]

T0 = BCTb.ix[CID,Tox]
Y = T0.apply(lambda y: np.sum(y==0)>1 and np.sum(y>0)>0,axis=0)
Tox1 = Y[Y].index

print "> CL ",cl,'n=',len(CID),'Tox=',len(Tox1),time.strftime("%H:%M",time.localtime())
sys.stdout.flush()
sys.stderr.flush()

X_cl=BCTb.ix[CID,Tox1]
S_chm_cl=S_chm.ix[CID,:]
S_bio_cl=S_bio.ix[CID,:]
S_bc_cl=S_bc.ix[CID,:]

PERF = []
for t0 in Tox1:
    print '   ', t0
    #P = ClActivityFromNN(t0,X_cl,S_chm_cl,sim_type='chm',ret='perf',
    #                     k0=3,s0=0.2,perm=0,wt=True)
    P = GenRAPerf(t0,X_cl,S_bio_cl,sim_type='bio',ret='perf',
                         k0=3,s0=0.2,perm=0,wt=True)
    
    PERF.append(P)

> CL  5 n= 29 Tox= 65 11:05
    chr_bone_marrow
    chr_clinical_chemistry
    chr_clinical_signs
    chr_eye
    chr_food_consumption
    chr_heart
    chr_hematology
    chr_kidney
    chr_mortality
    chr_nerve
    chr_ovary
    chr_parathyroid_gland
    chr_pituitary_gland
    chr_spleen
    chr_testes
    chr_thyroid_gland
    chr_uncertain_primary_site
    chr_urinalysis
    chr_uterus
    dev_bone
    dev_clinical_chemistry
    dev_clinical_signs
    dev_food_consumption
    dev_maternal_wastage
    dev_mortality
    dev_reproductive_performance
    dev_spleen
    mgr_adrenal_gland
    mgr_brain
    mgr_clinical_chemistry
    mgr_clinical_signs
    mgr_epididymis
    mgr_food_consumption
    mgr_heart
    mgr_kidney
    mgr_liver
    mgr_mortality
    mgr_offspring_survival-early
    mgr_offspring_survival-late
    mgr_pituitary_gland
    mgr_reproductive_performance
    mgr_sexual_developmental_landmark
    mgr_spleen
    mgr_testes
    sub_body_weight
    sub_bone_marrow
    sub_brain
    sub_clinical_signs
    sub_estrous_cycle
    sub_eye
    sub_food_consumption
    sub_harderian_gland
    sub_heart
    sub_kidney
    sub_lung
    sub_mortality
    sub_other
    sub_ovary
    sub_salivary_glands
    sub_spleen
    sub_stomach
    sub_testes
    sub_thymus
    sub_thyroid_gland
    sub_urinalysis

PERF_df = pd.DataFrame(PERF)
PERF_df.t0_max.hist(bins=20)

<matplotlib.axes.AxesSubplot at 0xa595a90>

d_view.execute("""
%load_ext autoreload
%autoreload 2
import ml.mlearn as ml
from ml.mlearn import *
from chem.ra.readacross import *
import chem.ra.readacross as genra
from chem.clust import *
""")
from chem.ra.readacross import *
import chem.ra.readacross as genra

RA prediction of toxicity -- binary¶

from chem.ra.readacross import *
P_ALL = []
print "Starting GenRA Analysis to assign activity " + time.strftime("%d/%m/%Y %H:%M",time.localtime())

initParallel()

from IPython.html.widgets import FloatProgress
from IPython.display import display

N = len(M2C)
pc = int(N/100)

pbar = FloatProgress(min=0, max=100,description='GenRA Perf')
display(pbar)

i_cl =0
for cl,CID in M2C.iteritems():
    i_cl+=1
    if (i_cl % pc) == 0: 
        pbar.value += 1
    
    if len(CID)<2: continue
    #Tox1=STUDIES1[np.where(BCTb.ix[CID,STUDIES1].notnull().sum()>0)]
    T0 = BCTb.ix[CID,Tox]
    Y = T0.apply(lambda y: np.sum(y==0)>0 and np.sum(y>0)>0,axis=0)
    Tox1 = Y[Y].index

    if len(Tox1)<1: continue
    print "> CL ",cl,'n=',len(CID),'Tox=',len(Tox1),time.strftime("%H:%M",time.localtime())
    sys.stdout.flush()
    sys.stderr.flush()
    
    X_cl=BCTb.ix[CID,Tox1]
    S_chm_cl=S_chm.ix[CID,:]
    S_bio_cl=S_bio.ix[CID,:]
    S_bc_cl=S_bc.ix[CID,:]
    #k0 = len(CID)
    
    print "  Broadcasting data .."
    d_view.push(dict(X_cl=X_cl,S_bio_cl=S_bio_cl,S_chm_cl=S_chm_cl,S_bc_cl=S_bc_cl,Tox1=Tox1))
    
    SK0 = []
    L  = ifthen(len(CID)<10, len(CID), 10)
    for ba in Tox1:
        for s0 in np.linspace(1,0,num=11)[1:]:
            for k0 in range(1,L):
                SK0.append((ba,s0,k0))
            
    
    print "  Chm %d" % len(SK0)
    P = lb_view.map(lambda (ba_i,s0_i,k0_i): GenRAPerf(ba_i,X_cl,S_chm_cl,sim_type='chm',ret='perf',
                                                          k0=k0_i,s0=s0_i,perm=50,wt=True),
                    SK0)
    
    P_df = pd.DataFrame([i for i in P if len(i)])
    P_df['cl']=cl
    P_ALL = concat_df(P_ALL,P_df)


    print "  Bio" 
    P = lb_view.map(lambda (ba_i,s0_i,k0_i): GenRAPerf(ba_i,X_cl,S_bio_cl,sim_type='bio',ret='perf',
                                                          k0=k0_i,s0=s0_i,perm=50,wt=True),
                    SK0)

    P_df = pd.DataFrame([i for i in P if len(i)])
    P_df['cl']=cl
    P_ALL = concat_df(P_ALL,P_df)

    print "  BC"
    P = lb_view.map(lambda (ba_i,s0_i,k0_i): GenRAPerf(ba_i,X_cl,S_bc_cl,sim_type='bc',ret='perf',
                                                          k0=k0_i,s0=s0_i,perm=50,wt=True),
                    SK0)
    P_df = pd.DataFrame([i for i in P if len(i)])
    P_df['cl']=cl
    P_ALL = concat_df(P_ALL,P_df)
    
P_ALL.to_csv(RES_DIR+ time.strftime("genra-pred-tox-cl-nn-%Y-%m-%d-%H%M.csv",time.localtime()),index=False)

pbar.value = 100
#send_email(txt="Done",subj="ActivityFromNN bio")

Starting GenRA Analysis to assign activity 02/03/2016 11:10
> CL  0 n= 13 Tox= 67 11:10
  Broadcasting data ..
  Chm 6030
  Bio
  BC
> CL  1 n= 47 Tox= 122 11:12
  Broadcasting data ..
  Chm 10980
  Bio
  BC
> CL  2 n= 12 Tox= 22 11:15
  Broadcasting data ..
  Chm 1980
  Bio
  BC
> CL  3 n= 22 Tox= 21 11:15
  Broadcasting data ..
  Chm 1890
  Bio
  BC
> CL  4 n= 6 Tox= 22 11:16
  Broadcasting data ..
  Chm 1100
  Bio
  BC
> CL  5 n= 29 Tox= 78 11:16
  Broadcasting data ..
  Chm 7020
  Bio
  BC
> CL  6 n= 23 Tox= 88 11:18
  Broadcasting data ..
  Chm 7920
  Bio
  BC
> CL  7 n= 34 Tox= 93 11:20
  Broadcasting data ..
  Chm 8370
  Bio
  BC
> CL  10 n= 35 Tox= 75 11:22
  Broadcasting data ..
  Chm 6750
  Bio
  BC
> CL  11 n= 22 Tox= 102 11:25
  Broadcasting data ..
  Chm 9180
  Bio
  BC
> CL  12 n= 13 Tox= 91 11:27
  Broadcasting data ..
  Chm 8190
  Bio
  BC
> CL  13 n= 2 Tox= 48 11:30
  Broadcasting data ..
  Chm 480
  Bio
  BC
> CL  16 n= 12 Tox= 72 11:30
  Broadcasting data ..
  Chm 6480
  Bio
  BC
> CL  17 n= 30 Tox= 12 11:32
  Broadcasting data ..
  Chm 1080
  Bio
  BC
> CL  19 n= 117 Tox= 165 11:32
  Broadcasting data ..
  Chm 14850
  Bio
  BC
> CL  20 n= 4 Tox= 45 11:37
  Broadcasting data ..
  Chm 1350
  Bio
  BC
> CL  21 n= 51 Tox= 107 11:38
  Broadcasting data ..
  Chm 9630
  Bio
  BC
> CL  23 n= 16 Tox= 107 11:41
  Broadcasting data ..
  Chm 9630
  Bio
  BC
> CL  24 n= 35 Tox= 83 11:44
  Broadcasting data ..
  Chm 7470
  Bio
  BC
> CL  25 n= 62 Tox= 96 11:46
  Broadcasting data ..
  Chm 8640
  Bio
  BC
> CL  27 n= 2 Tox= 39 11:49
  Broadcasting data ..
  Chm 390
  Bio
  BC
> CL  28 n= 25 Tox= 57 11:49
  Broadcasting data ..
  Chm 5130
  Bio
  BC
> CL  29 n= 26 Tox= 92 11:50
  Broadcasting data ..
  Chm 8280
  Bio
  BC
> CL  31 n= 80 Tox= 164 11:53
  Broadcasting data ..
  Chm 14760
  Bio
  BC
> CL  33 n= 9 Tox= 89 11:58
  Broadcasting data ..
  Chm 7120
  Bio
  BC
> CL  34 n= 3 Tox= 30 12:00
  Broadcasting data ..
  Chm 600
  Bio
  BC
> CL  35 n= 369 Tox= 341 12:00
  Broadcasting data ..
  Chm 30690
  Bio
  BC
> CL  36 n= 23 Tox= 3 12:24
  Broadcasting data ..
  Chm 270
  Bio
  BC
> CL  37 n= 4 Tox= 66 12:24
  Broadcasting data ..
  Chm 1980
  Bio
  BC
> CL  41 n= 4 Tox= 2 12:24
  Broadcasting data ..
  Chm 60
  Bio
  BC
> CL  42 n= 19 Tox= 52 12:24
  Broadcasting data ..
  Chm 4680
  Bio
  BC
> CL  44 n= 5 Tox= 29 12:25
  Broadcasting data ..
  Chm 1160
  Bio
  BC
> CL  46 n= 16 Tox= 29 12:26
  Broadcasting data ..
  Chm 2610
  Bio
  BC
> CL  48 n= 2 Tox= 42 12:26
  Broadcasting data ..
  Chm 420
  Bio
  BC
> CL  49 n= 5 Tox= 21 12:26
  Broadcasting data ..
  Chm 840
  Bio
  BC
> CL  52 n= 4 Tox= 13 12:26
  Broadcasting data ..
  Chm 390
  Bio
  BC
> CL  53 n= 5 Tox= 46 12:27
  Broadcasting data ..
  Chm 1840
  Bio
  BC
> CL  55 n= 25 Tox= 131 12:27
  Broadcasting data ..
  Chm 11790
  Bio
  BC
> CL  56 n= 6 Tox= 58 12:31
  Broadcasting data ..
  Chm 2900
  Bio
  BC
> CL  58 n= 2 Tox= 26 12:32
  Broadcasting data ..
  Chm 260
  Bio
  BC
> CL  59 n= 37 Tox= 114 12:32
  Broadcasting data ..
  Chm 10260
  Bio
  BC
> CL  60 n= 3 Tox= 24 12:35
  Broadcasting data ..
  Chm 480
  Bio
  BC
> CL  61 n= 4 Tox= 24 12:35
  Broadcasting data ..
  Chm 720
  Bio
  BC
> CL  63 n= 4 Tox= 4 12:36
  Broadcasting data ..
  Chm 120
  Bio
  BC
> CL  67 n= 67 Tox= 131 12:36
  Broadcasting data ..
  Chm 11790
  Bio
  BC
> CL  68 n= 7 Tox= 14 12:39
  Broadcasting data ..
  Chm 840
  Bio
  BC
> CL  70 n= 3 Tox= 43 12:40
  Broadcasting data ..
  Chm 860
  Bio
  BC
> CL  74 n= 51 Tox= 75 12:40
  Broadcasting data ..
  Chm 6750
  Bio
  BC
> CL  75 n= 34 Tox= 145 12:42
  Broadcasting data ..
  Chm 13050
  Bio
  BC
> CL  76 n= 18 Tox= 115 12:46
  Broadcasting data ..
  Chm 10350
  Bio
  BC
> CL  77 n= 15 Tox= 87 12:50
  Broadcasting data ..
  Chm 7830
  Bio
  BC
> CL  78 n= 6 Tox= 63 12:52
  Broadcasting data ..
  Chm 3150
  Bio
  BC
> CL  79 n= 13 Tox= 98 12:53
  Broadcasting data ..
  Chm 8820
  Bio
  BC
> CL  80 n= 41 Tox= 149 12:55
  Broadcasting data ..
  Chm 13410
  Bio
  BC
> CL  83 n= 3 Tox= 11 13:00
  Broadcasting data ..
  Chm 220
  Bio
  BC
> CL  85 n= 7 Tox= 77 13:00
  Broadcasting data ..
  Chm 4620
  Bio
  BC
> CL  86 n= 13 Tox= 1 13:02
  Broadcasting data ..
  Chm 90
  Bio
  BC
> CL  88 n= 34 Tox= 112 13:02
  Broadcasting data ..
  Chm 10080
  Bio
  BC
> CL  96 n= 13 Tox= 81 13:05
  Broadcasting data ..
  Chm 7290
  Bio
  BC
> CL  97 n= 8 Tox= 83 13:07
  Broadcasting data ..
  Chm 5810
  Bio
  BC
> CL  98 n= 32 Tox= 18 13:08
  Broadcasting data ..
  Chm 1620
  Bio
  BC

P_ALL = pd.read_csv(RES_DIR+'genra-pred-tox-cl-nn-2016-03-02-1309.csv')

P_ALL.t0_max.hist(bins=20)

<matplotlib.axes.AxesSubplot at 0xf912b50>

N = dict(((i,len(v)) for i,v in M2C.iteritems()))

P_ALL['cl_n'] = P_ALL.cl.apply(lambda i: N[i])
P_ALL['n_chem']=P_ALL.n_sim_pos+P_ALL.n_sim_neg
P_ALL[(P_ALL['n_chem']>1)].shape

(517379, 19)

P_ALL1 = P_ALL.set_index(['cl','sim_type','effect','s0','k0','n_chem'])

# How many cluster analyzed ? 
len(set([i[0] for i in P_ALL1.index]))

61

Get a summary of all datasets¶

def mksig(p):
    y = ''
    if isfinite(p):
        if p<=0.1: 
            y='*'
        elif p<=0.01:
            y='**'
    
    return y
        

G=P_ALL1.reset_index().groupby(['cl','cl_n','sim_type','effect'])

PS0 = G.aggregate(dict(auc=np.max,auc_pval=np.min,ba_max=np.max,t0_max=np.median,n_neg=np.max,n_pos=np.max))
#P = Summary.select(lambda i:i[0]==2 and i[1]=='chm')
#PS0['res1'] = PS0.apply(lambda i: "%3.2f/%3.2f" % (i[2],i[4]),axis=1)
#PS0['res2'] = PS0.apply(lambda i: "%3.2f/%3.2f (%d+,%d-)" % (i[2],i[4],i[5],i[0]),axis=1)
PS0['res1'] = PS0.apply(lambda i: "%3.2f %s (%d+,%d-)" % (i[2],mksig(i[4]),i[5],i[0]),axis=1)
PS0.res1[PS0.index[pd.isnull(PS0.auc)]]=''

#Summary = Summary[(Summary['auc_pval']>0)]
PS0 = PS0.reset_index()
PS0['tox']=PS0.effect.apply(lambda i: i.split('_')[0])
PS0['effect'] = PS0.effect.apply(lambda i: i.replace('[not_in_list]','other'))

PS0.ix[:10]

Measure Performance across neighbourhood as volume under surface¶

list(P_ALL1.index)[:10]

[(0, 'chm', 'chr_adrenal_gland', 0.3, 1, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 2, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 3, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 4, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 5, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 6, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 7, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 8, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 9, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.2, 1, 2)]

P_ALL['tox']    = P_ALL.effect.apply(lambda i: i.split('_')[0])
P_ALL['effect'] = P_ALL.effect.apply(lambda i: i.replace('[not_in_list]','other'))

from scipy.interpolate import *
from IPython.html.widgets import FloatProgress
from IPython.display import display

def smoothData(xi,yi,zi,out='ij'):
    x1,y1=np.mgrid[xi.min():xi.max():50j,yi.min():yi.max():50j]
    f = bisplrep(xi,yi,zi,s=0.5)
    z1= bisplev(x1[:,0],y1[0,:],f)
    
    if out =='ij':
        return np.concatenate(x1),np.concatenate(y1),np.concatenate(z1)
    else:
        return x1,y1,z1

X_vol = []

Work = list(P_ALL1.reset_index()[['cl','sim_type','effect']].drop_duplicates().to_records(index=False))
N = len(Work)
pc = int(N/100)

pbar = FloatProgress(min=0, max=100,description='GenRA Perf Vol')
display(pbar)

i_cl=0
for cl,sim_type,tox in Work:
    if (i_cl % pc) == 0: 
        pbar.value += 1
    i_cl+=1
    
    for ft in ['chm','bio','bc']:
        X = P_ALL1.xs((cl,ft,tox)).reset_index()
        if (X.auc>0).sum()==0: continue
        try:
            x2,y2,z2 = smoothData(X.n_chem, X.s0, X.auc,out='xy')
            z2[z2>1]=1
            v1 = z2.sum().sum()
            v2 = z2[z2>0.5].sum().sum()
            (x,y) = z2.shape
            vf1 = v1/(x*y)
            vf2 = v2/(x*y)
        except:
            pass
        else:
            X_vol.append(dict(cl=cl,tox=tox,sim_type=ft,auc_vol1=v1,auc_vol2=v2,auc_volf1=vf1,auc_volf2=vf2))

if len(X_vol)>0:
    PERF_vol=pd.pivot_table(pd.DataFrame(X_vol),
                            index=['cl','tox'],columns='sim_type',values='auc_volf1')
    PERF_vol[PERF_vol>1]=1

pbar.value=100

X=P_ALL1.xs((cl,ft)).reset_index()
X.effect.unique()
#PERF_vol=pd.pivot_table(pd.DataFrame(X_vol),
#                        index=['cl','tox'],columns='sim_type',values='auc_volf1')
#PERF_vol[PERF_vol>1]=1

V0 = pd.DataFrame(X_vol)
V0.rename(columns=dict(tox='effect'),inplace=True)
V0['tox']=V0.effect.apply(lambda i: i.split('_')[0])
V0['effect'] = V0.effect.apply(lambda i: i.replace('[not_in_list]','other'))
PERF_vol=pd.pivot_table(V0,index=['cl','tox','effect'],columns='sim_type',values='auc_volf1')
#PERF_vol[['auc_volf1']].ix[:10]

Merge ROC/AUC and VUS¶

"""
PERF0 = pd.merge(PS0,V0,left_on=['cl','tox','effect','sim_type'],right_on=['cl','tox','effect','sim_type'],how='outer')
PERF0.auc_volf1.fillna(0,inplace=True)
# concatentation: VUS_volf1, ROC AUC, pval, n+,n-
#PERF0['res2'] = PERF0.apply(lambda i: "%3.2f %3.2f %s(%d+,%d-)" % (i[2],mksig(i[4]),i[5],i[0]),axis=1)
PERF0['res2'] = PERF0.apply(lambda i: "%3.2f %3.2f %s(%d+,%d-,%d)" % (i['auc_volf1'],i['auc'],mksig(i['auc_pval']),i['n_pos'],i['n_neg'],i['cl_n']),axis=1)
PERF0['n_perf'] = PERF0.n_pos+PERF0.n_neg
#PERF0.res2[PERF0.index[pd.isnull(PERF0.VUS_vol)]]=''

PERF1 = pd.pivot_table(PERF0,index=['cl','tox','effect','cl_n','n_perf'],columns='sim_type',values=['auc_volf1','auc','auc_pval','res2'],
                       aggfunc=lambda i: np.unique(i)[0])

"""

'\nPERF0 = pd.merge(PS0,V0,left_on=[\'cl\',\'tox\',\'effect\',\'sim_type\'],right_on=[\'cl\',\'tox\',\'effect\',\'sim_type\'],how=\'outer\')\nPERF0.auc_volf1.fillna(0,inplace=True)\n# concatentation: VUS_volf1, ROC AUC, pval, n+,n-\n#PERF0[\'res2\'] = PERF0.apply(lambda i: "%3.2f %3.2f %s(%d+,%d-)" % (i[2],mksig(i[4]),i[5],i[0]),axis=1)\nPERF0[\'res2\'] = PERF0.apply(lambda i: "%3.2f %3.2f %s(%d+,%d-,%d)" % (i[\'auc_volf1\'],i[\'auc\'],mksig(i[\'auc_pval\']),i[\'n_pos\'],i[\'n_neg\'],i[\'cl_n\']),axis=1)\nPERF0[\'n_perf\'] = PERF0.n_pos+PERF0.n_neg\n#PERF0.res2[PERF0.index[pd.isnull(PERF0.VUS_vol)]]=\'\'\n\nPERF1 = pd.pivot_table(PERF0,index=[\'cl\',\'tox\',\'effect\',\'cl_n\',\'n_perf\'],columns=\'sim_type\',values=[\'auc_volf1\',\'auc\',\'auc_pval\',\'res2\'],\n                       aggfunc=lambda i: np.unique(i)[0])\n\n'

x=[0,13,22,6,29,12]
P1_vol = PERF_vol.select(lambda x: x[0] in x)
P1_vol.ix[np.where((P1_vol>0.75).sum(axis=1)>0)]

X1=PERF_vol.xs(80)
#tox1=list(X1.ix[X1.index[(X1>0.9).any(axis=1)]].index)
#X1.ix[X1.index[(X1>0.7).any(axis=1)]]
X1.ix[X1.index[(X1>0.75).any(axis=1)]]
tox1=list(X1.ix[X1.index[(X1>0.75).any(axis=1)]].index)
tox1=list(X1.index[(X1>0.75).any(axis=1)])

['chr_brain',
 'mgr_body_weight',
 'mgr_bone',
 'mgr_intestine_large',
 'mgr_urinary_bladder',
 'sub_intestine_small',
 'sub_prostate',
 'sub_spleen',
 'sub_urinalysis',
 'sub_water_consumption']

Summarize performance for all cluster, descriptors and outcomes¶

def mkcol(x): 
    y = x[0]
    if len(x)==2 and x[1]!='':
        y += '_'+x[1]
    for i in range(1,len(x)-1):
        if x[i] or x[i]!='':
            y += x[i]
    return y

G=P_ALL.groupby(['cl','tox','effect','sim_type','n_pos','n_neg','cl_n'])
P0=G.aggregate(dict(auc=dict(mn=np.mean,sd=np.std),t0_max=dict(mn=np.mean,sd=np.std),
                    sn_max=dict(mn=np.mean,sd=np.std),sp_max=dict(mn=np.mean,sd=np.std)))

P0['AUC']=P0['auc'].ix[:10].apply(lambda i: "%3.2f(%1.1f)" % (i[0],i[1]),axis=1)
P0['Spec']=P0['sp_max'].ix[:10].apply(lambda i: "%3.2f(%1.1f)" % (i[0],i[1]),axis=1)
P0['Sens']=P0['sn_max'].ix[:10].apply(lambda i: "%3.2f(%1.1f)" % (i[0],i[1]),axis=1)
P0['GenRA0']=P0['t0_max'].ix[:10].apply(lambda i: "%3.2f(%1.1f)" % (i[0],i[1]),axis=1)

#P0[['AUC','Sens','Spec','y0']].ix[:10]

P0[('t0_max')].mn.hist()
P_ALL.t0_max.hist()

<matplotlib.axes.AxesSubplot at 0x4ec62ad0>

#pl.scatter(P0[('AUC','mn')])
P0.columns

MultiIndex(levels=[[u'sp_max', u't0_max', u'auc', u'sn_max', u'AUC', u'Spec', u'Sens', u'GenRA0'], [u'mn', u'sd', u'']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7], [0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2]])

VUS=PERF_vol.copy()
VUS[VUS>1]=1
#VUS_sig=VUS.ix[(VUS>0.4).any(axis=1)]
#VUS_sig.ix[:10]

VUS.bc.hist(bins=20)

<matplotlib.axes.AxesSubplot at 0xf933150>

Performance comparison VUS¶

B = VUS.copy()
B[B<0.5]=0
B[B>=0.5]=1
VUS_summary= B.copy()
VUS_summary['Any bio|chm|bc'] = (B>0).any(axis=1)
VUS_summary['Only chm'] = B.apply(lambda x: x[1]==0 and x[0]==0 and x[2]==1,axis=1)
VUS_summary['Only bio'] = B.apply(lambda x: x[1]==1 and x[0]==0 and x[2]==0,axis=1)
VUS_summary['Only bc'] = B.apply(lambda x: x[1]==0 and x[0]==1 and x[2]==0,axis=1)
VUS_summary['chm>bio|bc'] = VUS.apply(lambda x: x[2]>=0.5 and x[2]>x[1] and x[2]>x[0],axis=1)
VUS_summary['bio>chm|bc'] = VUS.apply(lambda x: x[1]>=0.5 and x[1]>x[2] and x[1]>x[0],axis=1)
VUS_summary['bc>chm|bio'] = VUS.apply(lambda x: x[0]>=0.5 and x[0]>x[2] and x[0]>x[1],axis=1)
#VUS_summary=VUS_summary.ix[:,3:]
#Just to keep count
VUS_summary['n']=1
VUS_summary=VUS_summary[['n',  u'chm',u'bio',u'bc', u'Any bio|chm|bc', u'Only chm', u'Only bio',
       u'Only bc', u'chm>bio|bc', u'bio>chm|bc', u'bc>chm|bio']]
VUS_summary.sum(axis=0)

sim_type
n                 3665
chm                924
bio                856
bc                 770
Any bio|chm|bc    1378
Only chm           244
Only bio           224
Only bc             47
chm>bio|bc         567
bio>chm|bc         538
bc>chm|bio         217
dtype: float64

VUS_summary.ix[:2]
#len(set([i[0] for i in VUS_summary.index]))

VUS_summary.groupby(level=[1]).aggregate(np.sum)

P2.ix[:10]

Performance comparison AUC¶

PERF_auc=pd.pivot_table(P2.reset_index(),index=['cl','tox','effect'],columns='sim_type',values='auc_mn')

PERF_auc.ix[:3]

B = PERF_auc.copy()
auc0=0.75
B[B<auc0]=0
B[B>=auc0]=1
AUC_summary= B.copy()
#AUC_summary['Only chm'] = B.apply(lambda x: x[1]==0 and x[0]==0 and x[2]==1,axis=1)
#AUC_summary['Only bio'] = B.apply(lambda x: x[1]==1 and x[0]==0 and x[2]==0,axis=1)
#AUC_summary['Only bc'] = B.apply(lambda x: x[1]==0 and x[0]==1 and x[2]==0,axis=1)
AUC_summary['chm>bio|bc'] = PERF_auc.apply(lambda x: x[2]>=auc0 and x[2]>x[1] and x[2]>x[0],axis=1)
AUC_summary['bio>chm|bc'] = PERF_auc.apply(lambda x: x[1]>=auc0 and x[1]>x[2] and x[1]>x[0],axis=1)
AUC_summary['bc>chm|bio'] = PERF_auc.apply(lambda x: x[0]>=auc0 and x[0]>x[2] and x[0]>x[1],axis=1)
AUC_summary['Any'] = (B>0).any(axis=1)
#VUS_summary=VUS_summary.ix[:,3:]
#Just to keep count
AUC_summary['n']=1
AUC_summary=AUC_summary[['n',  u'chm',u'bio',u'bc', 
                         #u'Only chm', u'Only bio',u'Only bc', 
                         u'chm>bio|bc', u'bio>chm|bc', u'bc>chm|bio',
                         'Any']]

AUC_RES = AUC_summary.groupby(level=[1]).aggregate(np.sum)
AUC_RES.ix['All'] = AUC_summary.sum(axis=0)
AUC_RES

#AUC_RES['P%']=100.0*AUC_RES.Any/AUC_RES.n
#AUC_RES.ix['P%']=100.0*AUC_RES.ix['All'] /AUC_RES.n.ix['All'] 
AUC_RES

AUC_RES_P=AUC_RES.apply(lambda x: 100.0*x/AUC_RES.n)
AUC_RES_P=np.round(AUC_RES_P,decimals=0)

AUC_RES_P.sort('Any',ascending=False)

X = np.round(AUC_RES,decimals=0).copy()
for c in X.columns: X[c] = X[c].astype(np.uint)
for c in X.columns: X[c] = X[c].astype(np.str)

Y = np.round(AUC_RES_P,decimals=0).copy()
for c in Y.columns: Y[c] = Y[c].astype(np.uint)
for c in Y.columns: Y[c] = Y[c].astype(np.str)
Z = X+ ' / ' + Y + '%'
Z['n']=X.n
Z

CL_vus10={}
for (cl,tox),X in VUS_sig.groupby(level=[0,1]):
    #print cl,tox,X.shape
    #X.sort(['chm','bio','bc'],ascending=False,inplace=True)
    CL_vus10[cl]=[i[2] for i in X.index]

#VUS_sig=VUS.ix[(VUS>0.50).any(axis=1)]
X1=pd.melt(VUS.reset_index(),id_vars=['cl','tox','effect'],value_vars=['chm','bio','bc'],var_name='sim_type',value_name='VUS')
X1.set_index(['cl','tox','effect','sim_type'],inplace=True)

X2=P0[['auc','AUC','Sens','Spec','GenRA0']].reset_index().set_index(['cl','tox','effect','sim_type'])
X2.columns=map(mkcol,X2.columns)
P2 = pd.merge(X2,X1,left_index=True,right_index=True)
P2['tested_n']=P2.n_pos+P2.n_neg
P2['untested_n']=P2.cl_n-P2.tested_n
P21 = P2[['cl_n','untested_n','tested_n','n_pos','n_neg','VUS','AUC','Sens','Spec']]

P21.ix[:10]
#P0.ix[:10]
#P0[['auc','AUC','Sens','Spec','GenRA0']].reset_index().set_index(['cl','tox','effect','sim_type']).ix[:10]
P2.ix[:10]

#P.T.to_excel('/share/home/ishah/projects/Chem/data/results/genra-summary-%s.xls' % tmstmp)

# Output clusters to worksheet
W = pd.ExcelWriter(RES_DIR+'/genra-perf-summary-full-%s.xls' % tmstmp,engine='openpyxl')
for cl,P in P2.groupby(level=[0]):
    #X = P.reset_index()
    #I2= (X1<0.1).sum(axis=1)>0
    #X.columns = [i.replace('max','') for i in map(mkcol,X.columns)]
    P.to_excel(W,sheet_name="Cluster-%d" %cl,na_rep='')
W.save()

Figures and Results¶

C2.ix[C2.index[C2.chemical_name.apply(lambda x: x.lower().find('benox')>-1)]]

C2M['C98730042']

80

# Cluster 0 
X1=PERF_vol.xs(cc_id)
#len([i[1] for i in X11.index])
#X1.tox.unique(),len(X1.tox.unique()),len(X1.effect.unique())
X1

VUS_sig=VUS.ix[(VUS>0.5).any(axis=1)]
tox1=[i[1] for i in VUS_sig.xs(cc_id).index]
#tox1
tox1=[i for i in tox1 if re.match('chr_|dev_',i)]
tox1

['chr_brain',
 'chr_clinical_chemistry',
 'chr_clinical_signs',
 'chr_food_consumption',
 'chr_heart',
 'chr_hematology',
 'chr_kidney',
 'chr_liver',
 'chr_lung',
 'chr_ovary',
 'chr_spleen',
 'chr_testes',
 'chr_thymus',
 'chr_urinalysis',
 'dev_body_weight',
 'dev_bone',
 'dev_clinical_signs',
 'dev_food_consumption',
 'dev_kidney',
 'dev_liver',
 'dev_maternal_wastage',
 'dev_mortality',
 'dev_mouth_/_jaw',
 'dev_reproductive_performance']

#CL 23
tox1=['chr_brain',
 'chr_heart',
 'chr_kidney',
 'chr_liver',
 'chr_lung',
 'chr_ovary',
 'chr_spleen',
 'chr_testes',
 'chr_thymus',
 'dev_bone',
 'dev_kidney',
 'dev_liver',
 'dev_reproductive_performance']

#cl 80
tox1="""chr_brain
chr_kidney
mgr_body_weight
mgr_bone
mgr_intestine_large
mgr_urinary_bladder
sub_intestine_small
sub_prostate
sub_spleen
sub_urinalysis
sub_water_consumption""".split("\n")
tox1

['chr_brain',
 'chr_kidney',
 'mgr_body_weight',
 'mgr_bone',
 'mgr_intestine_large',
 'mgr_urinary_bladder',
 'sub_intestine_small',
 'sub_prostate',
 'sub_spleen',
 'sub_urinalysis',
 'sub_water_consumption']

X=(BCTb.ix[CID,tox1]>0).sum(axis=1)
X.sort(ascending=False)
X[:10]
CID.index(X.index[1])

309

tox1=['dev_food_consumption',
 'dev_sexual_developmental_landmark',
 'dev_sperm_morphology',
 'mgr_brain',
 'mgr_motor_activity',
 'sub_lung',
 'sub_sperm_morphology',
 'sub_testes',
 'sub_thymus'
 ]

# Cl = 5
tox1 = """dev_clinical_chemistry
dev_mortality
dev_reproductive_performance
dev_spleen""".split('\n')
tox1

['dev_clinical_chemistry',
 'dev_mortality',
 'dev_reproductive_performance',
 'dev_spleen']

', '.join(tox1)

'chr_body_weight, dev_food_consumption, dev_ureter, mgr_sexual_developmental_landmark, mgr_water_consumption, sub_estrous_cycle, sub_eye, sub_nose, sub_preputial_gland, sub_skeletal_muscle, sub_sperm_measure, sub_tongue, sub_urinalysis'

FIG_DIR

'/share/home/ishah/projects/Chem/figs/readacross/'

"""
dev_food_consumption                 0.319472
dev_sexual_developmental_landmark    0.319472
dev_sperm_morphology                 0.319472
mgr_brain                            0.805017
mgr_motor_activity                   0.194983
sub_lung                             0.295062
sub_sperm_morphology                 0.295062
sub_testes                           0.295062
sub_thymus                           0.295062
"""

'\ndev_food_consumption                 0.319472\ndev_sexual_developmental_landmark    0.319472\ndev_sperm_morphology                 0.319472\nmgr_brain                            0.805017\nmgr_motor_activity                   0.194983\nsub_lung                             0.295062\nsub_sperm_morphology                 0.295062\nsub_testes                           0.295062\nsub_thymus                           0.295062\n'

CID.index('C98730042')

19

from chem.ra.viz import *
from chem.ra.readacross import *
SM0 = getNNSimMatrices(CID,BCTb,C=Chm,B=Bio)
CH0 = getNNChmNm(CID,ChmNm1)
fig = pl.figure(figsize=(15,15))
ax = pl.subplot(1,1,1)
X,E1 = vizChmNN(CID[19],BCTb,SM0,dsc='Bio',ax=ax,c_s0=0,chem_sz=(90,60),c_knn=10,
                rs=1.0,r_min=150,
                th0=0.5*math.pi,th_tot=1.3*math.pi,
                lab_len=15,ch_fs=12,
                Effects=BCTb.ix[CID,tox1],CN=CH0,
                pred=True,t0=0.25,
                CD=CD,Mols=MOLS,
                save=True,fig_dir=FIG_DIR)
E1

chr_brain                1
chr_kidney               1
mgr_body_weight          1
mgr_bone                 1
mgr_intestine_large      0
mgr_urinary_bladder      1
sub_intestine_small      0
sub_prostate             1
sub_spleen               1
sub_urinalysis           1
sub_water_consumption    1
Name: C98730042, dtype: float64

from chem.ra.viz import *
from chem.ra.readacross import *
SM0 = getNNSimMatrices(CID,BCTb,C=Chm,B=Bio)
CH0 = getNNChmNm(CID,ChmNm1)
fig = pl.figure(figsize=(15,15))
ax = pl.subplot(1,1,1)
X,E1 = vizChmNN(CID[19],BCTb,SM0,dsc='Chm',ax=ax,c_s0=0,chem_sz=(90,60),c_knn=10,
                rs=2,r_min=150,
                th0=0.5*math.pi,th_tot=1.3*math.pi,
                lab_len=15,ch_fs=12,
                Effects=BCTb.ix[CID,tox1],CN=CH0,
                pred=True,t0=None,
                CD=CD,Mols=MOLS,
                save=True,fig_dir=FIG_DIR)
E1

chr_brain                0
chr_kidney               1
mgr_body_weight          1
mgr_bone                 0
mgr_intestine_large      0
mgr_urinary_bladder      0
sub_intestine_small      0
sub_prostate             0
sub_spleen               0
sub_urinalysis           0
sub_water_consumption    0
Name: C98730042, dtype: float64

S0 = SM0['Chm'].ix[CID[1]]
S0.sort()
S0

C4151502     0.239316
C29420493    0.260417
C2795393     0.282828
C754916      0.282828
C3871996     0.288660
C1763231     0.322581
C3825261     0.632353
C335762      0.909091
C375951      0.909091
C335671      0.909091
C2058948     0.909091
C375859      0.943396
C307244      1.000000
Name: C307244, dtype: float64

X = GenRAPred('mgr_body_weight', BCTb.ix[CID], SM0['Chm'],sim_type='chm',k0=8,s0=0.5,t0=None)

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.tri as mtri
import warnings
warnings.filterwarnings('ignore')

show_t0 = True
def smoothData(xi,yi,zi,out='ij'):
    x1,y1=np.mgrid[xi.min():xi.max():30j,yi.min():yi.max():30j]
    f = bisplrep(xi,yi,zi,s=1)
    z1= bisplev(x1[:,0],y1[0,:],f)
    
    if out =='ij':
        return np.concatenate(x1),np.concatenate(y1),np.concatenate(z1)
    else:
        return x1,y1,z1
    
#for cl in sorted([0,1,5]):
cl=cc_id
print "> CL",cl
j=0
#if cl > 5: break
VUS_sig=VUS.ix[(VUS>0.6).any(axis=1)]
#tox1=[i[1] for i in VUS_sig.xs().index]

fig = pl.figure(figsize=(15,4*len(tox1)))
for y in tox1:
    for ft in ['chm','bio','bc']:
        X = P_ALL1.xs((cl,ft,y)).reset_index()

        X['n_nn'] = X.n_sim_neg+X.n_sim_pos
        X.t0_max[X.t0_max>1]=1
        j+=1

        try:
            ax = fig.add_subplot(len(tox1)+1,3, j)

            x2,y2,z2 = smoothData(X.n_nn, X.s0, X.auc,out='xy')
            z2[z2<0]=0
            z2[z2>1]=X.auc.max()
            cf = ax.contourf(x2, y2, z2, 30,cmap=cm.coolwarm,vmin=0,vmax=1,alpha=0.9)

            x_min=X.n_nn.min()
            x_max=X.n_nn.max()
            ax.set_xlim(x_min,x_max)
            ax.set_ylim(0,X.s0.max())
            ax.set_xlabel('k',fontdict=dict(size=12))
            ax.set_ylabel('s',fontdict=dict(size=12))
            pl.setp(ax.get_xticklabels(), fontsize=8)
            pl.setp(ax.get_yticklabels(), fontsize=8)
            ax.set_title("CL-%d %s %s" % (cl,ft, y),fontdict=dict(size=14))
            if show_t0:
                x3,y3,z3 = smoothData(X.n_nn, X.s0, X.t0_max,out='xy')
                levels = np.linspace(X.t0_max.min(),X.t0_max.max(),8)
                cf2 = ax.contour(x3, y3, z3, levels,linewidths=3,cmap=cm.hot,alpha=0.8)
                pl.clabel(cf2, fmt='%2.1f', inline=1, fontsize=13)

        except:
            #print ">CL",cl,y,ft,"failed"
            continue
        else:
            #print ">CL",cl,y,ft,"passed"
            pass


ax = fig.add_subplot(len(tox1)+1,3, j+1)
ax.set_axis_off()
cb = pl.colorbar(cf,orientation='horizontal',ticks=np.linspace(0,1,6),shrink=1.5)
cb.ax.set_xlabel('ROC AUC',fontdict=dict(size=12,weight='bold'))
pl.tight_layout()
pl.subplots_adjust(wspace=0.3,hspace=0.3)

#fig.savefig(FIG_DIR+'genra-pred-mean-auc-k0-s0-cl-%d-nn.svg' % cl)
fig.savefig(FIG_DIR+'genra-pred-mean-auc-k0-s0-cl-%d-nn.png' % cl,dpi=500)

> CL 35

PERF1['res2'].select(lambda x: x[0]==cc_id and x[2] in tox1)

VUS.select(lambda x: x[0]==0 and x[2] in tox1)[['chm','bio','bc']]

Supplemental Data Files¶

SUPDAT_DIR='/share/home/ishah/Dropbox/Projects/Chem/docs/RTP-RA1/SupplData/'

# Data S1

# Data S2

tsv=SUPDAT_DIR+'supplemental-data-DS2-toxicity.tsv'
#W = pd.ExcelWriter(xl)
BCTc[Tox].to_csv(tsv,sep='\t')

#P.T.to_excel('/share/home/ishah/projects/Chem/data/results/genra-summary-%s.xls' % tmstmp)

# Output clusters to worksheet
W = pd.ExcelWriter('/share/home/ishah/projects/Chem/data/results/genra-vus-%s-all.xls' % tmstmp,engine='openpyxl')
for cl in sorted(set([i[0] for i in PERF_vol.index])):
    X = PERF_vol.xs(cl)['auc_volf1']
    X.to_excel(W,sheet_name="Cluster-%d" %cl)
W.save()

P21.ix[30:32]

#P.T.to_excel('/share/home/ishah/projects/Chem/data/results/genra-summary-%s.xls' % tmstmp)

# Output clusters to worksheet
W = pd.ExcelWriter('/share/home/ishah/projects/Chem/data/results/genra-perf-all-%s.xls' % tmstmp,engine='openpyxl')
for cl in sorted(set([i[0] for i in PERF1.index])):
    X = PERF1.xs(cl)['res2']
    X.to_excel(W,sheet_name="Cluster-%d" %cl)
W.save()

W = pd.ExcelWriter('/share/home/ishah/projects/Chem/data/results/genra-perf-sig-%s.xls' % tmstmp,engine='openpyxl')

X0 = PERF1['auc'][['chm','bio','bc']]
X = (X0>0.8).groupby(level=[0,1]).sum()
#X['Any bio|chm|bc'] = (X0>0).any(axis=1)
X['Only chm'] = X0.apply(lambda x: x[0]>=0.8 and x[1]<0.8 and x[2]<0.8,axis=1).groupby(level=[0,1]).sum()
X['Only bio'] = X0.apply(lambda x: x[0]< 0.8 and x[1]>=0.8 and x[2]<0.8,axis=1).groupby(level=[0,1]).sum()
X['Only bc']  = X0.apply(lambda x: x[0]< 0.8 and x[1]<0.8 and x[2]>=0.8,axis=1).groupby(level=[0,1]).sum()
X['chm>bio|bc'] = X0.apply(lambda x: x[0]>=0.8 and x[0]>x[1] and x[0]>x[2],axis=1).groupby(level=[0,1]).sum()
X['bio>chm|bc'] = X0.apply(lambda x: x[1]>=0.8 and x[1]>x[0] and x[1]>x[2],axis=1).groupby(level=[0,1]).sum()
X['bc>chm|bio'] = X0.apply(lambda x: x[2]>=0.8 and x[2]>x[0] and x[2]>x[1],axis=1).groupby(level=[0,1]).sum()

X.to_excel(W,sheet_name="Summary")

for cl in sorted(set([i[0] for i in PERF1.index])):
    X=PERF1.xs(cl)
    I = np.where((X['auc']>0.8).sum(axis=1)>0)
    if I and len(I[0])>0:
        X.res2[['chm','bio','bc']].ix[I].to_excel(W,sheet_name="Cluster-%d" %cl)
W.save()

	auc_vol1	auc_vol2	auc_volf1	auc_volf2	sim_type	effect	tox
0	100.040355	0.000000	0.040016	0.000000	chm	dev_bone	dev
1	131.911862	0.000000	0.052765	0.000000	bio	dev_bone	dev
2	40.673157	0.000000	0.016269	0.000000	bc	dev_bone	dev
3	100.040355	0.000000	0.040016	0.000000	chm	dev_clinical_signs	dev
4	131.911862	0.000000	0.052765	0.000000	bio	dev_clinical_signs	dev
5	40.673157	0.000000	0.016269	0.000000	bc	dev_clinical_signs	dev
6	366.058613	151.554364	0.146423	0.060622	chm	dev_food_consumption	dev
7	118.720676	0.000000	0.047488	0.000000	bio	dev_food_consumption	dev
8	57.737806	0.000000	0.023095	0.000000	bc	dev_food_consumption	dev
9	514.791060	167.875690	0.205916	0.067150	chm	dev_maternal_wastage	dev
10	500.609270	235.457789	0.200244	0.094183	bio	dev_maternal_wastage	dev

		sim_type	bc	bio	chm
cl	tox	effect
5	chr	chr_body_weight	0.617104	0.082518	0.769226
		chr_ovary	0.617104	0.082518	0.769226
		chr_parathyroid_gland	0.617104	0.082518	0.769226
		chr_pituitary_gland	0.617104	0.082518	0.769226
		chr_thyroid_gland	0.617104	0.082518	0.769226
10	chr	chr_clinical_chemistry	0.760124	0.345216	0.768459
		chr_food_consumption	0.668174	0.342450	0.753446
		chr_kidney	0.755633	0.286922	0.767791
11	chr	chr_thymus	0.613761	0.166593	0.820759
12	mgr	mgr_age_landmark	0.246632	0.192755	0.756789
		mgr_developmental_landmark	0.246632	0.192755	0.756789
		mgr_hair_growth	0.246632	0.192755	0.756789
19	chr	chr_body_weight	0.625520	0.758349	0.704801
	dev	dev_food_consumption	0.669297	0.787608	0.476795
	dev	dev_ureter	0.762750	0.735700	0.693668
	mgr	mgr_sexual_developmental_landmark	0.685706	0.808445	0.722415
	mgr	mgr_water_consumption	0.719722	0.808756	0.465403
	sub	sub_estrous_cycle	0.345597	0.818368	0.369229
		sub_eye	0.597859	0.503498	0.789788
		sub_nose	0.345597	0.818368	0.369229
		sub_preputial_gland	0.345597	0.818368	0.369229
		sub_skeletal_muscle	0.566775	0.761255	0.636861
		sub_sperm_measure	0.345597	0.818368	0.369229
		sub_tongue	0.574601	0.818328	0.580462
		sub_urinalysis	0.665631	0.672596	0.782579
21	dev	dev_hematology	0.722963	0.696837	0.787358
	sub	sub_lymph_node	0.282021	0.782465	0.276336
		sub_ovary	0.282021	0.782465	0.276336
		sub_thymus	0.282021	0.782465	0.276336
		sub_uterus	0.282021	0.782465	0.276336
...	...	...	...	...	...
75	sub	sub_liver	0.544417	0.800239	0.294044
75	sub	sub_spinal_cord	0.544417	0.800239	0.294044
76	chr	chr_brain	0.566908	0.513196	0.770841
		chr_clinical_chemistry	0.270581	0.755313	0.514739
		chr_kidney	0.270581	0.755313	0.514739
	dev	dev_heart	0.523003	0.312989	0.754124
		dev_hematology	0.280936	0.784062	0.471508
		dev_limb	0.528813	0.247416	0.779136
		dev_mortality	0.280936	0.784062	0.471508
		dev_squamosal	0.280936	0.784062	0.471508
	sub	sub_clinical_signs	0.280936	0.784062	0.471508
		sub_eye	0.618427	0.792649	0.764481
		sub_heart	0.503664	0.788795	0.673980
		sub_skin	0.528813	0.247416	0.779136
80	chr	chr_brain	0.560812	0.495356	0.768576
	mgr	mgr_body_weight	0.657462	0.690634	0.795845
		mgr_bone	0.464522	0.283251	0.775799
		mgr_intestine_large	0.657462	0.690634	0.795845
		mgr_urinary_bladder	0.653739	0.477013	0.788073
	sub	sub_intestine_small	0.462256	0.182290	0.773625
		sub_prostate	0.671432	0.535696	0.760091
		sub_spleen	0.671432	0.535696	0.760091
		sub_urinalysis	0.716573	0.410989	0.841579
		sub_water_consumption	0.671432	0.535696	0.760091
85	dev	dev_food_consumption	0.574070	0.485713	0.855638
	sub	sub_body_weight	0.465754	0.717500	0.818498
		sub_hematology	0.511686	0.490570	0.868607
		sub_kidney	0.382095	0.415409	0.842941
		sub_tissue_nos	0.465754	0.717500	0.818498
		sub_urinary_bladder	0.464212	0.317805	0.890014

				n_pos	n_neg	cl_n	auc_mn	auc_sd	AUC	Sens	Spec	GenRA0	VUS	tested_n	untested_n
cl	tox	effect	sim_type
0	dev	dev_bone	bc	2	1	13	0.062500	0.109789	0.06(0.1)	0.00(0.0)	1.00(0.0)	2.00(0.0)	0.016269	3	10
			bio	2	1	13	0.138889	0.125988	0.14(0.1)	0.00(0.0)	1.00(0.0)	2.00(0.0)	0.052765	3	10
			chm	2	1	13	0.083333	0.118958	0.08(0.1)	0.00(0.0)	1.00(0.0)	2.00(0.0)	0.040016	3	10
		dev_clinical_signs	bc	2	1	13	0.062500	0.109789	0.06(0.1)	0.00(0.0)	1.00(0.0)	2.00(0.0)	0.016269	3	10
			bio	2	1	13	0.138889	0.125988	0.14(0.1)	0.00(0.0)	1.00(0.0)	2.00(0.0)	0.052765	3	10
			chm	2	1	13	0.083333	0.118958	0.08(0.1)	0.00(0.0)	1.00(0.0)	2.00(0.0)	0.040016	3	10
		dev_food_consumption	bc	1	2	13	0.083333	0.119523	0.08(0.1)	0.00(0.0)	1.00(0.0)	1.76(0.2)	0.023095	3	10
			bio	1	2	13	0.125000	0.126773	0.12(0.1)	0.00(0.0)	1.00(0.0)	1.85(0.2)	0.047488	3	10
			chm	1	2	13	0.138889	0.125392	0.14(0.1)	0.00(0.0)	1.00(0.0)	1.78(0.2)	0.146423	3	10
		dev_maternal_wastage	bc	1	2	13	0.291667	0.437117	0.29(0.4)	0.25(0.4)	1.00(0.0)	1.24(0.2)	0.056750	3	10
			bio	1	2	13	0.527778	0.491999	0.53(0.5)	0.50(0.5)	1.00(0.0)	1.15(0.2)	0.200244	3	10
			chm	1	2	13	0.407407	0.445883	0.41(0.4)	0.33(0.5)	1.00(0.0)	1.22(0.2)	0.205916	3	10
		dev_mouth_/_jaw	bc	1	2	13	0.083333	0.119523	0.08(0.1)	0.00(0.0)	1.00(0.0)	1.76(0.2)	0.023095	3	10
			bio	1	2	13	0.125000	0.126773	0.12(0.1)	0.00(0.0)	1.00(0.0)	1.85(0.2)	0.047488	3	10
			chm	1	2	13	0.138889	0.125392	0.14(0.1)	0.00(0.0)	1.00(0.0)	1.78(0.2)	0.146423	3	10
		dev_offspring_survival-early	bc	1	2	13	0.083333	0.119523	0.08(0.1)	0.00(0.0)	1.00(0.0)	1.76(0.2)	0.023095	3	10
			bio	1	2	13	0.125000	0.126773	0.12(0.1)	0.00(0.0)	1.00(0.0)	1.85(0.2)	0.047488	3	10
			chm	1	2	13	0.138889	0.125392	0.14(0.1)	0.00(0.0)	1.00(0.0)	1.78(0.2)	0.146423	3	10
		dev_offspring_survival-late	bc	1	2	13	0.083333	0.119523	0.08(0.1)	0.00(0.0)	1.00(0.0)	1.76(0.2)	0.023095	3	10
			bio	1	2	13	0.125000	0.126773	0.12(0.1)	0.00(0.0)	1.00(0.0)	1.85(0.2)	0.047488	3	10
			chm	1	2	13	0.138889	0.125392	0.14(0.1)	0.00(0.0)	1.00(0.0)	1.78(0.2)	0.146423	3	10
		dev_reproductive_performance	bc	1	2	13	0.062500	0.109789	0.06(0.1)	0.00(0.0)	1.00(0.0)	1.76(0.2)	0.016269	3	10
			bio	1	2	13	0.138889	0.125988	0.14(0.1)	0.00(0.0)	1.00(0.0)	1.85(0.2)	0.052765	3	10
			chm	1	2	13	0.083333	0.118958	0.08(0.1)	0.00(0.0)	1.00(0.0)	1.85(0.2)	0.040016	3	10
		dev_sexual_developmental_landmark	bc	1	2	13	0.083333	0.119523	0.08(0.1)	0.00(0.0)	1.00(0.0)	1.76(0.2)	0.023095	3	10
			bio	1	2	13	0.125000	0.126773	0.12(0.1)	0.00(0.0)	1.00(0.0)	1.85(0.2)	0.047488	3	10
			chm	1	2	13	0.138889	0.125392	0.14(0.1)	0.00(0.0)	1.00(0.0)	1.78(0.2)	0.146423	3	10
		dev_sperm_morphology	bc	1	2	13	0.083333	0.119523	0.08(0.1)	0.00(0.0)	1.00(0.0)	1.76(0.2)	0.023095	3	10
			bio	1	2	13	0.125000	0.126773	0.12(0.1)	0.00(0.0)	1.00(0.0)	1.85(0.2)	0.047488	3	10
			chm	1	2	13	0.138889	0.125392	0.14(0.1)	0.00(0.0)	1.00(0.0)	1.78(0.2)	0.146423	3	10
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
10	sub	sub_skeletal_muscle	bc	1	8	35	0.702778	0.382072	0.70(0.4)	0.60(0.5)	1.00(0.0)	1.06(0.1)	0.675739	9	26
			bio	1	8	35	0.637153	0.378501	0.64(0.4)	0.50(0.5)	1.00(0.0)	1.36(0.4)	0.251533	9	26
			chm	1	8	35	0.644676	0.380429	0.64(0.4)	0.50(0.5)	1.00(0.0)	1.06(0.1)	0.570717	9	26
		sub_skin	bc	1	8	35	0.702778	0.382072	0.70(0.4)	0.60(0.5)	1.00(0.0)	1.06(0.1)	0.675739	9	26
			bio	1	8	35	0.637153	0.378501	0.64(0.4)	0.50(0.5)	1.00(0.0)	1.36(0.4)	0.251533	9	26
			chm	1	8	35	0.644676	0.380429	0.64(0.4)	0.50(0.5)	1.00(0.0)	1.06(0.1)	0.570717	9	26
		sub_spleen	bc	1	8	35	0.716667	0.376323	0.72(0.4)	0.60(0.5)	1.00(0.0)	1.04(0.1)	0.647340	9	26
			bio	1	8	35	0.677083	0.348626	0.68(0.3)	0.50(0.5)	1.00(0.0)	1.07(0.1)	0.273003	9	26
			chm	1	8	35	0.511574	0.377561	0.51(0.4)	0.33(0.5)	1.00(0.0)	1.21(0.2)	0.393840	9	26
		sub_stomach	bc	2	7	35	0.648413	0.363415	0.65(0.4)	0.57(0.4)	0.99(0.1)	0.98(0.3)	0.508003	9	26
			bio	2	7	35	0.738095	0.286224	0.74(0.3)	0.67(0.5)	0.91(0.2)	0.92(0.4)	0.274908	9	26
			chm	2	7	35	0.414021	0.349274	0.41(0.3)	0.44(0.5)	0.84(0.3)	1.13(0.6)	0.331652	9	26
		sub_testes	bc	4	5	35	0.767222	0.265048	0.77(0.3)	0.75(0.3)	0.93(0.1)	0.85(0.3)	0.637498	9	26
			bio	4	5	35	0.707639	0.312868	0.71(0.3)	0.73(0.4)	0.86(0.3)	1.05(0.6)	0.257348	9	26
			chm	4	5	35	0.664352	0.222846	0.66(0.2)	0.59(0.3)	0.98(0.1)	0.94(0.2)	0.519127	9	26
		sub_thymus	bc	3	6	35	0.623457	0.401996	0.62(0.4)	0.56(0.4)	0.99(0.0)	1.07(0.3)	0.558693	9	26
			bio	3	6	35	0.668981	0.341912	0.67(0.3)	0.63(0.5)	0.91(0.2)	0.97(0.4)	0.245487	9	26
			chm	3	6	35	0.500000	0.337500	0.50(0.3)	0.46(0.4)	0.91(0.2)	1.11(0.4)	0.440463	9	26
		sub_trachea	bc	1	8	35	0.716667	0.376323	0.72(0.4)	0.60(0.5)	1.00(0.0)	1.04(0.1)	0.647340	9	26
			bio	1	8	35	0.677083	0.348626	0.68(0.3)	0.50(0.5)	1.00(0.0)	1.07(0.1)	0.273003	9	26
			chm	1	8	35	0.511574	0.377561	0.51(0.4)	0.33(0.5)	1.00(0.0)	1.21(0.2)	0.393840	9	26
		sub_urinalysis	bc	1	8	35	0.719444	0.359930	0.72(0.4)	0.60(0.5)	1.00(0.0)	1.09(0.1)	0.681676	9	26
			bio	1	8	35	0.619792	0.404046	0.62(0.4)	0.50(0.5)	1.00(0.0)	1.13(0.2)	0.252956	9	26
			chm	1	8	35	0.780093	0.332661	0.78(0.3)	0.67(0.5)	1.00(0.0)	1.05(0.1)	0.668352	9	26
		sub_uterus	bc	2	7	35	0.873016	0.217849	0.87(0.2)	0.93(0.2)	0.91(0.2)	0.78(0.4)	0.740531	9	26
			bio	2	7	35	0.760913	0.297195	0.76(0.3)	0.92(0.3)	0.80(0.3)	0.73(0.5)	0.315979	9	26
			chm	2	7	35	0.889550	0.172507	0.89(0.2)	0.84(0.2)	0.97(0.1)	0.77(0.4)	0.670975	9	26
		sub_vagina	bc	1	8	35	0.719444	0.359930	0.72(0.4)	0.60(0.5)	1.00(0.0)	1.09(0.1)	0.681676	9	26
			bio	1	8	35	0.619792	0.404046	0.62(0.4)	0.50(0.5)	1.00(0.0)	1.13(0.2)	0.252956	9	26
			chm	1	8	35	0.780093	0.332661	0.78(0.3)	0.67(0.5)	1.00(0.0)	1.05(0.1)	0.668352	9	26

		sim_type	bc	bio	chm
cl	tox	effect
0	dev	dev_bone	0.062500	0.138889	0.083333
		dev_clinical_signs	0.062500	0.138889	0.083333
		dev_food_consumption	0.083333	0.125000	0.138889
		dev_maternal_wastage	0.291667	0.527778	0.407407
		dev_mouth_/_jaw	0.083333	0.125000	0.138889
		dev_offspring_survival-early	0.083333	0.125000	0.138889
		dev_offspring_survival-late	0.083333	0.125000	0.138889
		dev_reproductive_performance	0.062500	0.138889	0.083333
		dev_sexual_developmental_landmark	0.083333	0.125000	0.138889
		dev_sperm_morphology	0.083333	0.125000	0.138889
	mgr	mgr_adrenal_gland	0.312500	0.240741	0.506173
		mgr_age_landmark	0.041667	0.123457	0.148148
		mgr_brain	0.083333	0.388889	0.317901
		mgr_clinical_signs	0.157407	0.185185	0.436214
		mgr_developmental_landmark	0.041667	0.123457	0.148148
		mgr_food_consumption	0.312500	0.240741	0.506173
		mgr_kidney	0.041667	0.123457	0.148148
		mgr_liver	0.041667	0.123457	0.148148
		mgr_motor_activity	0.342593	0.197531	0.633745
		mgr_offspring_survival-early	0.041667	0.123457	0.148148
		mgr_offspring_survival-late	0.222222	0.703704	0.207819
		mgr_pituitary_gland	0.157407	0.185185	0.436214
		mgr_reproductive_performance	0.041667	0.123457	0.148148
		mgr_seminal_vesicle	0.222222	0.703704	0.207819
		mgr_sexual_developmental_landmark	0.041667	0.123457	0.148148
		mgr_sperm_measure	0.222222	0.703704	0.207819
		mgr_spleen	0.222222	0.703704	0.207819
		mgr_testes	0.157407	0.185185	0.436214
	sub	sub_body_weight	0.439815	0.339506	0.798354
	sub	sub_brain	0.163194	0.236111	0.405864
...	...	...	...	...	...
1	mgr	mgr_reproductive_performance	0.134259	0.194444	0.180556
		mgr_sexual_developmental_landmark	0.435185	0.722222	0.395833
		mgr_spleen	0.327160	0.688272	0.319444
		mgr_stomach	0.435185	0.722222	0.395833
		mgr_thyroid_gland	0.435185	0.722222	0.395833
	sub	sub_adrenal_gland	0.000000	0.179012	0.123457
		sub_bone_marrow	0.111111	0.469136	0.395062
		sub_clinical_signs	0.000000	0.179012	0.123457
		sub_estrous_cycle	0.055556	0.228395	0.395062
		sub_food_consumption	0.055556	0.228395	0.395062
		sub_heart	0.000000	0.179012	0.123457
		sub_hematology	0.055556	0.703704	0.123457
		sub_kidney	0.055556	0.703704	0.123457
		sub_lung	0.000000	0.179012	0.123457
		sub_mortality	0.000000	0.179012	0.123457
		sub_pancreas	0.055556	0.703704	0.123457
		sub_pituitary_gland	0.000000	0.179012	0.123457
		sub_prostate	0.000000	0.179012	0.123457
		sub_sperm_measure	0.055556	0.228395	0.395062
		sub_spleen	0.083333	0.398148	0.259259
		sub_testes	0.055556	0.703704	0.123457
		sub_thyroid_gland	0.055556	0.703704	0.123457
		sub_urinalysis	0.111111	0.469136	0.395062
		sub_urinary_bladder	0.055556	0.703704	0.123457
		sub_uterus	0.000000	0.179012	0.123457
3	sub	sub_body_weight	0.161111	0.000000	0.175926
		sub_bone_marrow	0.150000	0.500000	0.125000
		sub_clinical_signs	0.622222	0.250000	0.564815
		sub_hematology	0.150000	0.500000	0.125000
		sub_kidney	0.622222	0.250000	0.564815

sim_type	n	chm	bio	bc	chm>bio\|bc	bio>chm\|bc	bc>chm\|bio	Any
tox
dnt	100	19.047619	30.952381	17.857143	5.952381	23.809524	2.380952	32.142857
dev	100	8.848315	19.382022	7.022472	6.039326	16.432584	0.983146	23.455056
mgr	100	11.676647	18.862275	8.383234	6.886228	15.868263	0.598802	23.353293
sub	100	11.605206	15.184382	8.459870	6.941432	13.232104	2.277657	22.993492
All	100	9.909910	15.561016	7.425607	6.388206	13.049413	1.556102	21.130221
chr	100	8.699552	11.928251	6.008969	6.726457	9.596413	1.883408	18.206278
sac	100	1.562500	5.468750	4.687500	0.781250	4.687500	1.562500	7.031250
rep	100	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000

	cl_n	sim_type	effect	n_neg	t0_max	ba_max	auc_pval	n_pos	res1	res2	res3	tox
0	13	bc	chr_other	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
1	13	bc	chr_adrenal_gland	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
2	13	bc	chr_clinical_chemistry	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
3	13	bc	chr_clinical_signs	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
4	13	bc	chr_heart	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
5	13	bc	chr_hematology	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
6	13	bc	chr_kidney	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
7	13	bc	chr_lung	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
8	13	bc	chr_mammary_gland	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
9	13	bc	chr_other	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr
10	13	bc	chr_ovary	1	2	0.5	NaN	1	0.00/nan	0.00/nan (1+,1-)	0.00 (1+,1-)	chr

		sim_type	n	chm	bio	bc	Any bio\|chm\|bc	Only chm	Only bio	Only bc	chm>bio\|bc	bio>chm\|bc	bc>chm\|bio
cl	tox	effect
0	dev	dev_bone	1	0	0	0	False	False	False	False	False	False	False
0	dev	dev_clinical_signs	1	0	0	0	False	False	False	False	False	False	False

sim_type	n	chm	bio	bc	Any bio\|chm\|bc	Only chm	Only bio	Only bc	chm>bio\|bc	bio>chm\|bc	bc>chm\|bio
tox
chr	1116	300	235	211	423	92	75	11	211	148	44
dev	712	169	182	164	272	41	39	13	104	105	53
dnt	84	23	31	31	33	0	2	2	6	19	8
mgr	668	171	160	150	247	55	26	3	102	94	48
rep	34	0	0	0	0	0	0	0	0	0	0
sac	128	6	17	3	21	3	15	0	4	17	0
sub	923	255	231	211	382	53	67	18	140	155	64

sim_type	n	chm	bio	bc	chm>bio\|bc	bio>chm\|bc	bc>chm\|bio	Any
tox
chr	1115	97	133	67	75	107	21	203
dev	712	63	138	50	43	117	7	167
dnt	84	16	26	15	5	20	2	27
mgr	668	78	126	56	46	106	4	156
rep	34	0	0	0	0	0	0	0
sac	128	2	7	6	1	6	2	9
sub	922	107	140	78	64	122	21	212
All	3663	363	570	272	234	478	57	774

sim_type	n	chm	bio	bc	chm>bio\|bc	bio>chm\|bc	bc>chm\|bio	Any
tox
chr	100	9	12	6	7	10	2	18
dev	100	9	19	7	6	16	1	23
dnt	100	19	31	18	6	24	2	32
mgr	100	12	19	8	7	16	1	23
rep	100	0	0	0	0	0	0	0
sac	100	2	5	5	1	5	2	7
sub	100	12	15	8	7	13	2	23
All	100	10	16	7	6	13	2	21

sim_type	n	chm	bio	bc	chm>bio\|bc	bio>chm\|bc	bc>chm\|bio	Any
tox
chr	1115	97 / 9%	133 / 12%	67 / 6%	75 / 7%	107 / 10%	21 / 2%	203 / 18%
dev	712	63 / 9%	138 / 19%	50 / 7%	43 / 6%	117 / 16%	7 / 1%	167 / 23%
dnt	84	16 / 19%	26 / 31%	15 / 18%	5 / 6%	20 / 24%	2 / 2%	27 / 32%
mgr	668	78 / 12%	126 / 19%	56 / 8%	46 / 7%	106 / 16%	4 / 1%	156 / 23%
rep	34	0 / 0%	0 / 0%	0 / 0%	0 / 0%	0 / 0%	0 / 0%	0 / 0%
sac	128	2 / 2%	7 / 5%	6 / 5%	1 / 1%	6 / 5%	2 / 2%	9 / 7%
sub	922	107 / 12%	140 / 15%	78 / 8%	64 / 7%	122 / 13%	21 / 2%	212 / 23%
All	3663	363 / 10%	570 / 16%	272 / 7%	234 / 6%	478 / 13%	57 / 2%	774 / 21%

	DSSTox_GSID	chemical_casrn	chemical_name
ID
C98730042	29565	98730-04-2	Benoxacor
C5987826	45393	5987-82-6	Benoxinate hydrochloride
C7421401	45572	7421-40-1	Carbenoxolone sodium

	sim_type	bc	bio	chm
tox	effect
chr	chr_adrenal_gland	0.312603	0.375728	0.288374
	chr_bone	0.322365	0.302694	0.185011
	chr_bone_marrow	0.452572	0.451387	0.360484
	chr_brain	0.414691	0.550617	0.361929
	chr_clinical_chemistry	0.559710	0.568432	0.634867
	chr_clinical_signs	0.319269	0.548075	0.316050
	chr_eye	0.322365	0.302694	0.185011
	chr_food_consumption	0.264522	0.541604	0.252762
	chr_harderian_gland	0.322365	0.302694	0.185011
	chr_heart	0.414691	0.550617	0.361929
	chr_hematology	0.496921	0.537500	0.443887
	chr_kidney	0.357352	0.578639	0.270581
	chr_liver	0.563700	0.670874	0.526654
	chr_lung	0.363292	0.525132	0.328286
	chr_mammary_gland	0.182220	0.225096	0.105629
	chr_mortality	0.226409	0.387818	0.068247
	chr_nerve	0.322365	0.302694	0.185011
	chr_ovary	0.401035	0.504559	0.318494
	chr_pancreas	0.182220	0.225096	0.105629
	chr_pituitary_gland	0.450284	0.482156	0.325381
	chr_prostate	0.182220	0.225096	0.105629
	chr_skeletal_muscle	0.192328	0.197086	0.147820
	chr_spinal_cord	0.322365	0.302694	0.185011
	chr_spleen	0.363292	0.525132	0.328286
	chr_stomach	0.182220	0.225096	0.105629
	chr_testes	0.539835	0.485364	0.416178
	chr_thymus	0.412400	0.513296	0.372169
	chr_thyroid_gland	0.404627	0.325637	0.111829
	chr_tissue_nos	0.192328	0.197086	0.147820
	chr_uncertain_primary_site	0.182220	0.225096	0.105629
...	...	...	...	...
mgr	mgr_urinary_bladder	0.286215	0.347127	0.131438
mgr	mgr_uterus	0.178155	0.177940	0.102234
sub	sub_adrenal_gland	0.346024	0.472219	0.161727
	sub_body_weight	0.345613	0.498366	0.174434
	sub_bone_marrow	0.327716	0.265147	0.188561
	sub_brain	0.346024	0.472219	0.161727
	sub_cervix	0.152862	0.180177	0.095546
	sub_clinical_chemistry	0.686736	0.786940	0.526380
	sub_clinical_signs	0.395908	0.545049	0.202484
	sub_epididymis	0.152862	0.180177	0.095546
	sub_eye	0.327716	0.265147	0.188561
	sub_food_consumption	0.324200	0.451884	0.209787
	sub_heart	0.346024	0.472219	0.161727
	sub_hematology	0.326182	0.300658	0.244393
	sub_intestine_large	0.146224	0.221300	0.077283
	sub_kidney	0.346024	0.472219	0.161727
	sub_lung	0.346024	0.472219	0.161727
	sub_mortality	0.152862	0.180177	0.095546
	sub_ovary	0.327716	0.265147	0.188561
	sub_pituitary_gland	0.342407	0.448727	0.191974
	sub_prostate	0.152862	0.180177	0.095546
	sub_salivary_glands	0.152862	0.180177	0.095546
	sub_spleen	0.346024	0.472219	0.161727
	sub_stomach	0.262148	0.271942	0.226201
	sub_testes	0.346024	0.472219	0.161727
	sub_thymus	0.346024	0.472219	0.161727
	sub_thyroid_gland	0.272395	0.437325	0.166180
	sub_urinalysis	0.504888	0.660129	0.327908
	sub_uterus	0.152862	0.180177	0.095546
	sub_water_consumption	0.152862	0.180177	0.095546

				sim_type	bc	bio	chm
cl	tox	effect	cl_n	n_perf
35	chr	chr_body_weight	369	105	0.60 1.00 *(92+,13-,369)	0.66 1.00 *(92+,13-,369)	0.65 1.00 *(92+,13-,369)
		chr_heart	369	105	0.59 0.96 *(27+,78-,369)	0.64 0.98 *(27+,78-,369)	0.66 0.98 *(27+,78-,369)
		chr_liver	369	105	0.66 0.98 *(76+,29-,369)	0.64 1.00 *(76+,29-,369)	0.68 1.00 *(76+,29-,369)
	dev	dev_body_weight	369	106	0.60 1.00 *(103+,3-,369)	0.66 1.00 *(103+,3-,369)	0.53 1.00 *(103+,3-,369)
		dev_bone	369	106	0.66 1.00 *(62+,44-,369)	0.62 0.97 *(62+,44-,369)	0.67 1.00 *(62+,44-,369)
		dev_clinical_signs	369	106	0.74 1.00 *(77+,29-,369)	0.66 1.00 *(77+,29-,369)	0.73 1.00 *(77+,29-,369)
		dev_food_consumption	369	106	0.65 0.98 *(77+,29-,369)	0.61 0.99 *(77+,29-,369)	0.63 1.00 *(77+,29-,369)
		dev_intestine_small	369	106	0.45 1.00 *(3+,103-,369)	0.65 1.00 *(3+,103-,369)	0.45 1.00 *(3+,103-,369)
		dev_lung	369	106	0.61 1.00 *(9+,97-,369)	0.72 1.00 *(9+,97-,369)	0.61 0.94 *(9+,97-,369)
		dev_maternal_wastage	369	106	0.66 0.98 *(38+,68-,369)	0.63 1.00 *(38+,68-,369)	0.68 0.99 *(38+,68-,369)
		dev_stomach	369	106	0.54 1.00 *(18+,88-,369)	0.62 0.97 *(18+,88-,369)	0.61 0.97 *(18+,88-,369)

	s0	k0	n_chem	act_wt	auc	auc_pval	ba_max	fpr_max	n_neg	n_pos	n_sim_neg	n_sim_pos	sn_max	sp_max	t0_max	cl_n
0	0.8	1	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
1	0.8	2	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
2	0.8	3	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
3	0.8	4	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
4	0.8	5	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
5	0.8	6	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
6	0.8	7	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
7	0.8	8	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
8	0.8	9	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
9	0.7	1	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
10	0.7	2	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
11	0.7	3	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
12	0.7	4	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
13	0.7	5	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
14	0.7	6	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
15	0.7	7	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
16	0.7	8	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
17	0.7	9	2	1	1.000	0.00	1.0	0	4	1	2	0	1	1	1.000000	13
18	0.6	1	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
19	0.6	2	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
20	0.6	3	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
21	0.6	4	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
22	0.6	5	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
23	0.6	6	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
24	0.6	7	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
25	0.6	8	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
26	0.6	9	4	1	1.000	0.00	1.0	0	4	1	4	0	1	1	1.000000	13
27	0.5	1	5	1	0.500	0.00	0.5	0	4	1	4	1	0	1	1.000000	13
28	0.5	2	5	1	0.375	0.18	0.5	0	4	1	4	1	0	1	1.366529	13
29	0.5	3	5	1	0.375	0.22	0.5	0	4	1	4	1	0	1	1.366529	13
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
51	0.3	7	5	1	0.250	0.34	0.5	0	4	1	4	1	0	1	1.334706	13
52	0.3	8	5	1	0.250	0.48	0.5	0	4	1	4	1	0	1	1.334706	13
53	0.3	9	5	1	0.250	0.44	0.5	0	4	1	4	1	0	1	1.334706	13
54	0.2	1	5	1	0.500	0.00	0.5	0	4	1	4	1	0	1	1.000000	13
55	0.2	2	5	1	0.250	0.36	0.5	0	4	1	4	1	0	1	1.366529	13
56	0.2	3	5	1	0.125	0.68	0.5	0	4	1	4	1	0	1	1.295062	13
57	0.2	4	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
58	0.2	5	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
59	0.2	6	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
60	0.2	7	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
61	0.2	8	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
62	0.2	9	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
63	0.1	1	5	1	0.500	0.00	0.5	0	4	1	4	1	0	1	1.000000	13
64	0.1	2	5	1	0.250	0.46	0.5	0	4	1	4	1	0	1	1.366529	13
65	0.1	3	5	1	0.125	0.60	0.5	0	4	1	4	1	0	1	1.295062	13
66	0.1	4	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
67	0.1	5	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
68	0.1	6	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
69	0.1	7	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
70	0.1	8	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
71	0.1	9	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
72	0.0	1	5	1	0.500	0.00	0.5	0	4	1	4	1	0	1	1.000000	13
73	0.0	2	5	1	0.250	0.44	0.5	0	4	1	4	1	0	1	1.366529	13
74	0.0	3	5	1	0.125	0.56	0.5	0	4	1	4	1	0	1	1.295062	13
75	0.0	4	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
76	0.0	5	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
77	0.0	6	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
78	0.0	7	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
79	0.0	8	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13
80	0.0	9	5	1	0.000	NaN	0.5	0	4	1	4	1	0	1	1.253177	13

sim_type	n	chm	bio	bc	chm>bio\|bc	bio>chm\|bc	bc>chm\|bio	Any
tox
chr	100	9	12	6	7	10	2	18
dev	100	9	19	7	6	16	1	23
dnt	100	19	31	18	6	24	2	32
mgr	100	12	19	8	7	16	1	23
rep	100	0	0	0	0	0	0	0
sac	100	2	5	5	1	5	2	7
sub	100	12	15	8	7	13	2	23
All	100	10	16	7	6	13	2	21

sim_type	n	chm	bio	bc	chm>bio\|bc	bio>chm\|bc	bc>chm\|bio	Any
tox
chr	100	9	12	6	7	10	2	18
dev	100	9	19	7	6	16	1	23
dnt	100	19	31	18	6	24	2	32
mgr	100	12	19	8	7	16	1	23
rep	100	0	0	0	0	0	0	0
sac	100	2	5	5	1	5	2	7
sub	100	12	15	8	7	13	2	23
All	100	10	16	7	6	13	2	21