In [1]:
%load_ext autoreload
%autoreload 2
%load_ext sql 
%pylab inline
%matplotlib inline

import matplotlib.text as text
from IPython.html.widgets import FloatProgress
from IPython.display import display
import scipy.interpolate as interp
import pandas as pd
#from mp.txpepa import *
from bio.data.toxplorer import *
import bio.hts.apredica as apr
from bio.hts.htsdb import *
from bio.data.toxplorer import *
import viz.clust as cv
from chem.clust import *
from sklearn import (manifold, datasets, decomposition, ensemble, lda,
                     random_projection)
from sklearn.metrics.pairwise import euclidean_distances,manhattan_distances
import statsmodels.api as sm
import numpy.linalg as LA
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector
stats = importr('stats')
from sklearn.neighbors import KNeighborsClassifier
from genra.readacross import *

mng.register_connection("hts-db","htsdb",username="ishah",
                        password="xxx",host='localhost')
mng.register_connection("txp-db","toxplorerdb",username="ishah",
                        password="xxx",host='localhost')

%sql postgresql://ishah:xxx@localhost/chemicals

DAT_DIR = '/share/home/ishah/projects/Chem/data/tables/'
PKL_DIR = '/share/home/ishah/projects/Chem/data/pickle/'
RES_DIR='/share/home/ishah/projects/Chem/data/results/'
FIG_DIR='/share/home/ishah/projects/Chem/figs/readacross/'

import pickle

tmstmp = time.strftime("%m-%d-%Y",time.localtime())
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['connect', 'copy', 'flatten']
`%matplotlib` prevents importing * from pylab and numpy

Initialization

In [3]:
# Start the parallel machine
from ml.mlearn import *
from ml.readacross import *
import IPython.parallel as PP
%reload_ext autoreload
%autoreload 2

lb_view=None
d_view =None

def initParallel(Data=None):
    RC = PP.Client(profile='galaxy_parallel')
    global lb_view
    global d_view
    d_view = RC[:]
    d_view.block = True
    lb_view = RC.load_balanced_view()
    lb_view.block = True
    d_view.execute("""
%load_ext autoreload
%autoreload 2
import ml.mlearn as ml
from ml.mlearn import *
from ml.readacross import *
from chem.clust import *
    """)
    
    if Data:
        d_view.push(Data)

Load the data from pickles

In [4]:
print "\n".join([i for i in os.listdir(PKL_DIR) if i.startswith('tx-tr')])
[BCc,BCTc,BCTb,Bio,Chm,Tox] = pd.read_pickle(PKL_DIR+'tx-tr-ch-02-08-2016.pkl')
tx-tr-ch-02-08-2016.pkl
tx-tr-ch-11-16-2015.pkl
tx-tr-ch-02-12-2015.pkl
In [5]:
#print "\n".join(os.listdir(PKL_DIR))
[A0,C0,C1,B1] = pd.read_pickle(PKL_DIR+'chm-bio-02-12-2015.pkl')
MOLS = pd.read_pickle(PKL_DIR+'mols-02-12-2015.pkl')
[C2M,M2C,Cl_st,C2] = pd.read_pickle(PKL_DIR+'clust-02-12-2015.pkl')
C2 = C1.set_index('ID')
In [6]:
# Chemical fingerprints

from rdkit import Chem
from rdkit.DataStructs import *
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import AllChem


FP1 = pd.DataFrame([np.array(AllChem.GetMorganFingerprintAsBitVect(i,3,1024)) for i in MOLS.values()])
FP1.index=MOLS.keys()
FP1.columns = ['mrgn_%d'%i for i in FP1.columns]

FP2 = pd.DataFrame([np.array(MACCSkeys.FingerprintMol(i)) for i in MOLS.values()])
FP2.index=MOLS.keys()
FP2.columns = ['mccs_%d'%i for i in FP2.columns]

FP3 = pd.DataFrame([np.array(AllChem.GetHashedTopologicalTorsionFingerprintAsBitVect(i)) for i in MOLS.values()])
FP3.index=MOLS.keys()
FP3.columns = ['tptr_%d'%i for i in FP3.columns]

FP0 = pd.merge(FP1,FP2,left_index=True,right_index=True)
FP0 = pd.merge(FP0,FP3,left_index=True,right_index=True)
FP0.index.names=['ID']
In [7]:
#Distance calculations

# Distance calculations
# make a binary rep
X = BCTb[Bio]
CID = X.index
X = X.fillna(0)
X[X!=0]=1
D_bio = pd.DataFrame(squareform(pdist(X,'jaccard')),
                     columns=CID,index=CID)
S_bio = 1-D_bio
In [8]:
S_bio.shape
Out[8]:
(8642, 8642)
In [9]:
X = BCTb[Chm]
CID = X.index
X = X.fillna(0)
X[X!=0]=1
D_chm = pd.DataFrame(squareform(pdist(X,'jaccard')),
                     columns=CID,index=CID)
S_chm = 1-D_chm
In [10]:
X = BCTb[Bio+Chm]
CID = X.index
X = X.fillna(0)
X[X!=0]=1
D_bc = pd.DataFrame(squareform(pdist(X,'jaccard')),
                    columns=CID,index=CID)
S_bc = 1-D_bc

S_bc.shape
Out[10]:
(8642, 8642)
In [11]:
ChmNm1 = C1[['ID','chemical_name']]
ChmNm1.set_index('ID',inplace=True)
STUDIES = ['mgr', 'chr', 'sac', 'sub', 'dev', 'rep', 'oth', 'acu', 'neu', 'dnt']
In [14]:
#pd.merge(C2.ix[CID],BCTb.ix[CID,STUDIES],left_index=True,right_index=True)

Search chemicals by clusterid

In [ ]:
R= C1.ix[C1.chemical_name.apply(lambda i: i.lower().find("propiconazole")>-1)]
print R
C2M['C94361065']
In [21]:
len(Tox)
Out[21]:
574
In [29]:
#BCTb0 = BCTb.copy()
BCTb[BCTb>0]=1

Nearest neighbour viz </h3>

In [15]:
CD = ChemDrawing()
In [35]:
cl = 5
CID = M2C[cl]

T0 = BCTb.ix[CID,Tox]
Y = T0.apply(lambda y: np.sum(y==0)>1 and np.sum(y>0)>0,axis=0)
Tox1 = Y[Y].index

print "> CL ",cl,'n=',len(CID),'Tox=',len(Tox1),time.strftime("%H:%M",time.localtime())
sys.stdout.flush()
sys.stderr.flush()

X_cl=BCTb.ix[CID,Tox1]
S_chm_cl=S_chm.ix[CID,:]
S_bio_cl=S_bio.ix[CID,:]
S_bc_cl=S_bc.ix[CID,:]

PERF = []
for t0 in Tox1:
    print '   ', t0
    #P = ClActivityFromNN(t0,X_cl,S_chm_cl,sim_type='chm',ret='perf',
    #                     k0=3,s0=0.2,perm=0,wt=True)
    P = GenRAPerf(t0,X_cl,S_bio_cl,sim_type='bio',ret='perf',
                         k0=3,s0=0.2,perm=0,wt=True)
    
    PERF.append(P)
> CL  5 n= 29 Tox= 65 11:05
    chr_bone_marrow
    chr_clinical_chemistry
    chr_clinical_signs
    chr_eye
    chr_food_consumption
    chr_heart
    chr_hematology
    chr_kidney
    chr_mortality
    chr_nerve
    chr_ovary
    chr_parathyroid_gland
    chr_pituitary_gland
    chr_spleen
    chr_testes
    chr_thyroid_gland
    chr_uncertain_primary_site
    chr_urinalysis
    chr_uterus
    dev_bone
    dev_clinical_chemistry
    dev_clinical_signs
    dev_food_consumption
    dev_maternal_wastage
    dev_mortality
    dev_reproductive_performance
    dev_spleen
    mgr_adrenal_gland
    mgr_brain
    mgr_clinical_chemistry
    mgr_clinical_signs
    mgr_epididymis
    mgr_food_consumption
    mgr_heart
    mgr_kidney
    mgr_liver
    mgr_mortality
    mgr_offspring_survival-early
    mgr_offspring_survival-late
    mgr_pituitary_gland
    mgr_reproductive_performance
    mgr_sexual_developmental_landmark
    mgr_spleen
    mgr_testes
    sub_body_weight
    sub_bone_marrow
    sub_brain
    sub_clinical_signs
    sub_estrous_cycle
    sub_eye
    sub_food_consumption
    sub_harderian_gland
    sub_heart
    sub_kidney
    sub_lung
    sub_mortality
    sub_other
    sub_ovary
    sub_salivary_glands
    sub_spleen
    sub_stomach
    sub_testes
    sub_thymus
    sub_thyroid_gland
    sub_urinalysis
In [40]:
PERF_df = pd.DataFrame(PERF)
PERF_df.t0_max.hist(bins=20)
Out[40]:
<matplotlib.axes.AxesSubplot at 0xa595a90>
In [42]:
d_view.execute("""
%load_ext autoreload
%autoreload 2
import ml.mlearn as ml
from ml.mlearn import *
from chem.ra.readacross import *
import chem.ra.readacross as genra
from chem.clust import *
""")
from chem.ra.readacross import *
import chem.ra.readacross as genra

RA prediction of toxicity -- binary

In [43]:
from chem.ra.readacross import *
P_ALL = []
print "Starting GenRA Analysis to assign activity " + time.strftime("%d/%m/%Y %H:%M",time.localtime())

initParallel()

from IPython.html.widgets import FloatProgress
from IPython.display import display

N = len(M2C)
pc = int(N/100)

pbar = FloatProgress(min=0, max=100,description='GenRA Perf')
display(pbar)

i_cl =0
for cl,CID in M2C.iteritems():
    i_cl+=1
    if (i_cl % pc) == 0: 
        pbar.value += 1
    
    if len(CID)<2: continue
    #Tox1=STUDIES1[np.where(BCTb.ix[CID,STUDIES1].notnull().sum()>0)]
    T0 = BCTb.ix[CID,Tox]
    Y = T0.apply(lambda y: np.sum(y==0)>0 and np.sum(y>0)>0,axis=0)
    Tox1 = Y[Y].index

    if len(Tox1)<1: continue
    print "> CL ",cl,'n=',len(CID),'Tox=',len(Tox1),time.strftime("%H:%M",time.localtime())
    sys.stdout.flush()
    sys.stderr.flush()
    
    X_cl=BCTb.ix[CID,Tox1]
    S_chm_cl=S_chm.ix[CID,:]
    S_bio_cl=S_bio.ix[CID,:]
    S_bc_cl=S_bc.ix[CID,:]
    #k0 = len(CID)
    
    print "  Broadcasting data .."
    d_view.push(dict(X_cl=X_cl,S_bio_cl=S_bio_cl,S_chm_cl=S_chm_cl,S_bc_cl=S_bc_cl,Tox1=Tox1))
    
    SK0 = []
    L  = ifthen(len(CID)<10, len(CID), 10)
    for ba in Tox1:
        for s0 in np.linspace(1,0,num=11)[1:]:
            for k0 in range(1,L):
                SK0.append((ba,s0,k0))
            
    
    print "  Chm %d" % len(SK0)
    P = lb_view.map(lambda (ba_i,s0_i,k0_i): GenRAPerf(ba_i,X_cl,S_chm_cl,sim_type='chm',ret='perf',
                                                          k0=k0_i,s0=s0_i,perm=50,wt=True),
                    SK0)
    
    P_df = pd.DataFrame([i for i in P if len(i)])
    P_df['cl']=cl
    P_ALL = concat_df(P_ALL,P_df)


    print "  Bio" 
    P = lb_view.map(lambda (ba_i,s0_i,k0_i): GenRAPerf(ba_i,X_cl,S_bio_cl,sim_type='bio',ret='perf',
                                                          k0=k0_i,s0=s0_i,perm=50,wt=True),
                    SK0)

    P_df = pd.DataFrame([i for i in P if len(i)])
    P_df['cl']=cl
    P_ALL = concat_df(P_ALL,P_df)

    print "  BC"
    P = lb_view.map(lambda (ba_i,s0_i,k0_i): GenRAPerf(ba_i,X_cl,S_bc_cl,sim_type='bc',ret='perf',
                                                          k0=k0_i,s0=s0_i,perm=50,wt=True),
                    SK0)
    P_df = pd.DataFrame([i for i in P if len(i)])
    P_df['cl']=cl
    P_ALL = concat_df(P_ALL,P_df)
    
P_ALL.to_csv(RES_DIR+ time.strftime("genra-pred-tox-cl-nn-%Y-%m-%d-%H%M.csv",time.localtime()),index=False)

pbar.value = 100
#send_email(txt="Done",subj="ActivityFromNN bio")
Starting GenRA Analysis to assign activity 02/03/2016 11:10
> CL  0 n= 13 Tox= 67 11:10
  Broadcasting data ..
  Chm 6030
  Bio
  BC
> CL  1 n= 47 Tox= 122 11:12
  Broadcasting data ..
  Chm 10980
  Bio
  BC
> CL  2 n= 12 Tox= 22 11:15
  Broadcasting data ..
  Chm 1980
  Bio
  BC
> CL  3 n= 22 Tox= 21 11:15
  Broadcasting data ..
  Chm 1890
  Bio
  BC
> CL  4 n= 6 Tox= 22 11:16
  Broadcasting data ..
  Chm 1100
  Bio
  BC
> CL  5 n= 29 Tox= 78 11:16
  Broadcasting data ..
  Chm 7020
  Bio
  BC
> CL  6 n= 23 Tox= 88 11:18
  Broadcasting data ..
  Chm 7920
  Bio
  BC
> CL  7 n= 34 Tox= 93 11:20
  Broadcasting data ..
  Chm 8370
  Bio
  BC
> CL  10 n= 35 Tox= 75 11:22
  Broadcasting data ..
  Chm 6750
  Bio
  BC
> CL  11 n= 22 Tox= 102 11:25
  Broadcasting data ..
  Chm 9180
  Bio
  BC
> CL  12 n= 13 Tox= 91 11:27
  Broadcasting data ..
  Chm 8190
  Bio
  BC
> CL  13 n= 2 Tox= 48 11:30
  Broadcasting data ..
  Chm 480
  Bio
  BC
> CL  16 n= 12 Tox= 72 11:30
  Broadcasting data ..
  Chm 6480
  Bio
  BC
> CL  17 n= 30 Tox= 12 11:32
  Broadcasting data ..
  Chm 1080
  Bio
  BC
> CL  19 n= 117 Tox= 165 11:32
  Broadcasting data ..
  Chm 14850
  Bio
  BC
> CL  20 n= 4 Tox= 45 11:37
  Broadcasting data ..
  Chm 1350
  Bio
  BC
> CL  21 n= 51 Tox= 107 11:38
  Broadcasting data ..
  Chm 9630
  Bio
  BC
> CL  23 n= 16 Tox= 107 11:41
  Broadcasting data ..
  Chm 9630
  Bio
  BC
> CL  24 n= 35 Tox= 83 11:44
  Broadcasting data ..
  Chm 7470
  Bio
  BC
> CL  25 n= 62 Tox= 96 11:46
  Broadcasting data ..
  Chm 8640
  Bio
  BC
> CL  27 n= 2 Tox= 39 11:49
  Broadcasting data ..
  Chm 390
  Bio
  BC
> CL  28 n= 25 Tox= 57 11:49
  Broadcasting data ..
  Chm 5130
  Bio
  BC
> CL  29 n= 26 Tox= 92 11:50
  Broadcasting data ..
  Chm 8280
  Bio
  BC
> CL  31 n= 80 Tox= 164 11:53
  Broadcasting data ..
  Chm 14760
  Bio
  BC
> CL  33 n= 9 Tox= 89 11:58
  Broadcasting data ..
  Chm 7120
  Bio
  BC
> CL  34 n= 3 Tox= 30 12:00
  Broadcasting data ..
  Chm 600
  Bio
  BC
> CL  35 n= 369 Tox= 341 12:00
  Broadcasting data ..
  Chm 30690
  Bio
  BC
> CL  36 n= 23 Tox= 3 12:24
  Broadcasting data ..
  Chm 270
  Bio
  BC
> CL  37 n= 4 Tox= 66 12:24
  Broadcasting data ..
  Chm 1980
  Bio
  BC
> CL  41 n= 4 Tox= 2 12:24
  Broadcasting data ..
  Chm 60
  Bio
  BC
> CL  42 n= 19 Tox= 52 12:24
  Broadcasting data ..
  Chm 4680
  Bio
  BC
> CL  44 n= 5 Tox= 29 12:25
  Broadcasting data ..
  Chm 1160
  Bio
  BC
> CL  46 n= 16 Tox= 29 12:26
  Broadcasting data ..
  Chm 2610
  Bio
  BC
> CL  48 n= 2 Tox= 42 12:26
  Broadcasting data ..
  Chm 420
  Bio
  BC
> CL  49 n= 5 Tox= 21 12:26
  Broadcasting data ..
  Chm 840
  Bio
  BC
> CL  52 n= 4 Tox= 13 12:26
  Broadcasting data ..
  Chm 390
  Bio
  BC
> CL  53 n= 5 Tox= 46 12:27
  Broadcasting data ..
  Chm 1840
  Bio
  BC
> CL  55 n= 25 Tox= 131 12:27
  Broadcasting data ..
  Chm 11790
  Bio
  BC
> CL  56 n= 6 Tox= 58 12:31
  Broadcasting data ..
  Chm 2900
  Bio
  BC
> CL  58 n= 2 Tox= 26 12:32
  Broadcasting data ..
  Chm 260
  Bio
  BC
> CL  59 n= 37 Tox= 114 12:32
  Broadcasting data ..
  Chm 10260
  Bio
  BC
> CL  60 n= 3 Tox= 24 12:35
  Broadcasting data ..
  Chm 480
  Bio
  BC
> CL  61 n= 4 Tox= 24 12:35
  Broadcasting data ..
  Chm 720
  Bio
  BC
> CL  63 n= 4 Tox= 4 12:36
  Broadcasting data ..
  Chm 120
  Bio
  BC
> CL  67 n= 67 Tox= 131 12:36
  Broadcasting data ..
  Chm 11790
  Bio
  BC
> CL  68 n= 7 Tox= 14 12:39
  Broadcasting data ..
  Chm 840
  Bio
  BC
> CL  70 n= 3 Tox= 43 12:40
  Broadcasting data ..
  Chm 860
  Bio
  BC
> CL  74 n= 51 Tox= 75 12:40
  Broadcasting data ..
  Chm 6750
  Bio
  BC
> CL  75 n= 34 Tox= 145 12:42
  Broadcasting data ..
  Chm 13050
  Bio
  BC
> CL  76 n= 18 Tox= 115 12:46
  Broadcasting data ..
  Chm 10350
  Bio
  BC
> CL  77 n= 15 Tox= 87 12:50
  Broadcasting data ..
  Chm 7830
  Bio
  BC
> CL  78 n= 6 Tox= 63 12:52
  Broadcasting data ..
  Chm 3150
  Bio
  BC
> CL  79 n= 13 Tox= 98 12:53
  Broadcasting data ..
  Chm 8820
  Bio
  BC
> CL  80 n= 41 Tox= 149 12:55
  Broadcasting data ..
  Chm 13410
  Bio
  BC
> CL  83 n= 3 Tox= 11 13:00
  Broadcasting data ..
  Chm 220
  Bio
  BC
> CL  85 n= 7 Tox= 77 13:00
  Broadcasting data ..
  Chm 4620
  Bio
  BC
> CL  86 n= 13 Tox= 1 13:02
  Broadcasting data ..
  Chm 90
  Bio
  BC
> CL  88 n= 34 Tox= 112 13:02
  Broadcasting data ..
  Chm 10080
  Bio
  BC
> CL  96 n= 13 Tox= 81 13:05
  Broadcasting data ..
  Chm 7290
  Bio
  BC
> CL  97 n= 8 Tox= 83 13:07
  Broadcasting data ..
  Chm 5810
  Bio
  BC
> CL  98 n= 32 Tox= 18 13:08
  Broadcasting data ..
  Chm 1620
  Bio
  BC
In [11]:
P_ALL = pd.read_csv(RES_DIR+'genra-pred-tox-cl-nn-2016-03-02-1309.csv')
In [12]:
P_ALL.t0_max.hist(bins=20)
Out[12]:
<matplotlib.axes.AxesSubplot at 0xf912b50>
In [13]:
N = dict(((i,len(v)) for i,v in M2C.iteritems()))

P_ALL['cl_n'] = P_ALL.cl.apply(lambda i: N[i])
P_ALL['n_chem']=P_ALL.n_sim_pos+P_ALL.n_sim_neg
P_ALL[(P_ALL['n_chem']>1)].shape
Out[13]:
(517379, 19)
In [14]:
P_ALL1 = P_ALL.set_index(['cl','sim_type','effect','s0','k0','n_chem'])
In [52]:
# How many cluster analyzed ? 
len(set([i[0] for i in P_ALL1.index]))
Out[52]:
61

Get a summary of all datasets

In [201]:
def mksig(p):
    y = ''
    if isfinite(p):
        if p<=0.1: 
            y='*'
        elif p<=0.01:
            y='**'
    
    return y
        

G=P_ALL1.reset_index().groupby(['cl','cl_n','sim_type','effect'])

PS0 = G.aggregate(dict(auc=np.max,auc_pval=np.min,ba_max=np.max,t0_max=np.median,n_neg=np.max,n_pos=np.max))
#P = Summary.select(lambda i:i[0]==2 and i[1]=='chm')
#PS0['res1'] = PS0.apply(lambda i: "%3.2f/%3.2f" % (i[2],i[4]),axis=1)
#PS0['res2'] = PS0.apply(lambda i: "%3.2f/%3.2f (%d+,%d-)" % (i[2],i[4],i[5],i[0]),axis=1)
PS0['res1'] = PS0.apply(lambda i: "%3.2f %s (%d+,%d-)" % (i[2],mksig(i[4]),i[5],i[0]),axis=1)
PS0.res1[PS0.index[pd.isnull(PS0.auc)]]=''

#Summary = Summary[(Summary['auc_pval']>0)]
PS0 = PS0.reset_index()
PS0['tox']=PS0.effect.apply(lambda i: i.split('_')[0])
PS0['effect'] = PS0.effect.apply(lambda i: i.replace('[not_in_list]','other'))
In [192]:
PS0.ix[:10]
Out[192]:
cl cl_n sim_type effect n_neg t0_max auc ba_max auc_pval n_pos res1 res2 res3 tox
0 0 13 bc chr_other 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
1 0 13 bc chr_adrenal_gland 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
2 0 13 bc chr_clinical_chemistry 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
3 0 13 bc chr_clinical_signs 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
4 0 13 bc chr_heart 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
5 0 13 bc chr_hematology 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
6 0 13 bc chr_kidney 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
7 0 13 bc chr_lung 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
8 0 13 bc chr_mammary_gland 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
9 0 13 bc chr_other 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr
10 0 13 bc chr_ovary 1 2 0 0.5 NaN 1 0.00/nan 0.00/nan (1+,1-) 0.00 (1+,1-) chr

Measure Performance across neighbourhood as volume under surface

In [167]:
list(P_ALL1.index)[:10]
Out[167]:
[(0, 'chm', 'chr_adrenal_gland', 0.3, 1, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 2, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 3, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 4, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 5, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 6, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 7, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 8, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.3, 9, 2),
 (0, 'chm', 'chr_adrenal_gland', 0.2, 1, 2)]
In [119]:
P_ALL['tox']    = P_ALL.effect.apply(lambda i: i.split('_')[0])
P_ALL['effect'] = P_ALL.effect.apply(lambda i: i.replace('[not_in_list]','other'))
Out[119]:
act_wt auc auc_pval ba_max cl effect fpr_max k0 n_neg n_pos n_sim_neg n_sim_pos s0 sim_type sn_max sp_max t0_max cl_n n_chem tox
0 1 0 NaN 0.5 0 chr_adrenal_gland 0 1 1 1 1 1 0.3 chm 0 1 2 13 2 chr
1 1 0 NaN 0.5 0 chr_adrenal_gland 0 2 1 1 1 1 0.3 chm 0 1 2 13 2 chr
2 1 0 NaN 0.5 0 chr_adrenal_gland 0 3 1 1 1 1 0.3 chm 0 1 2 13 2 chr
3 1 0 NaN 0.5 0 chr_adrenal_gland 0 4 1 1 1 1 0.3 chm 0 1 2 13 2 chr
4 1 0 NaN 0.5 0 chr_adrenal_gland 0 5 1 1 1 1 0.3 chm 0 1 2 13 2 chr
5 1 0 NaN 0.5 0 chr_adrenal_gland 0 6 1 1 1 1 0.3 chm 0 1 2 13 2 chr
6 1 0 NaN 0.5 0 chr_adrenal_gland 0 7 1 1 1 1 0.3 chm 0 1 2 13 2 chr
7 1 0 NaN 0.5 0 chr_adrenal_gland 0 8 1 1 1 1 0.3 chm 0 1 2 13 2 chr
8 1 0 NaN 0.5 0 chr_adrenal_gland 0 9 1 1 1 1 0.3 chm 0 1 2 13 2 chr
9 1 0 NaN 0.5 0 chr_adrenal_gland 0 1 1 1 1 1 0.2 chm 0 1 2 13 2 chr
10 1 0 NaN 0.5 0 chr_adrenal_gland 0 2 1 1 1 1 0.2 chm 0 1 2 13 2 chr
In [168]:
from scipy.interpolate import *
from IPython.html.widgets import FloatProgress
from IPython.display import display

def smoothData(xi,yi,zi,out='ij'):
    x1,y1=np.mgrid[xi.min():xi.max():50j,yi.min():yi.max():50j]
    f = bisplrep(xi,yi,zi,s=0.5)
    z1= bisplev(x1[:,0],y1[0,:],f)
    
    if out =='ij':
        return np.concatenate(x1),np.concatenate(y1),np.concatenate(z1)
    else:
        return x1,y1,z1

X_vol = []

Work = list(P_ALL1.reset_index()[['cl','sim_type','effect']].drop_duplicates().to_records(index=False))
N = len(Work)
pc = int(N/100)

pbar = FloatProgress(min=0, max=100,description='GenRA Perf Vol')
display(pbar)

i_cl=0
for cl,sim_type,tox in Work:
    if (i_cl % pc) == 0: 
        pbar.value += 1
    i_cl+=1
    
    for ft in ['chm','bio','bc']:
        X = P_ALL1.xs((cl,ft,tox)).reset_index()
        if (X.auc>0).sum()==0: continue
        try:
            x2,y2,z2 = smoothData(X.n_chem, X.s0, X.auc,out='xy')
            z2[z2>1]=1
            v1 = z2.sum().sum()
            v2 = z2[z2>0.5].sum().sum()
            (x,y) = z2.shape
            vf1 = v1/(x*y)
            vf2 = v2/(x*y)
        except:
            pass
        else:
            X_vol.append(dict(cl=cl,tox=tox,sim_type=ft,auc_vol1=v1,auc_vol2=v2,auc_volf1=vf1,auc_volf2=vf2))

if len(X_vol)>0:
    PERF_vol=pd.pivot_table(pd.DataFrame(X_vol),
                            index=['cl','tox'],columns='sim_type',values='auc_volf1')
    PERF_vol[PERF_vol>1]=1

pbar.value=100
In [204]:
X=P_ALL1.xs((cl,ft)).reset_index()
X.effect.unique()
#PERF_vol=pd.pivot_table(pd.DataFrame(X_vol),
#                        index=['cl','tox'],columns='sim_type',values='auc_volf1')
#PERF_vol[PERF_vol>1]=1
In [218]:
V0 = pd.DataFrame(X_vol)
V0.rename(columns=dict(tox='effect'),inplace=True)
V0['tox']=V0.effect.apply(lambda i: i.split('_')[0])
V0['effect'] = V0.effect.apply(lambda i: i.replace('[not_in_list]','other'))
PERF_vol=pd.pivot_table(V0,index=['cl','tox','effect'],columns='sim_type',values='auc_volf1')
#PERF_vol[['auc_volf1']].ix[:10]
Out[218]:
auc_vol1 auc_vol2 auc_volf1 auc_volf2 cl sim_type effect tox
0 100.040355 0.000000 0.040016 0.000000 0 chm dev_bone dev
1 131.911862 0.000000 0.052765 0.000000 0 bio dev_bone dev
2 40.673157 0.000000 0.016269 0.000000 0 bc dev_bone dev
3 100.040355 0.000000 0.040016 0.000000 0 chm dev_clinical_signs dev
4 131.911862 0.000000 0.052765 0.000000 0 bio dev_clinical_signs dev
5 40.673157 0.000000 0.016269 0.000000 0 bc dev_clinical_signs dev
6 366.058613 151.554364 0.146423 0.060622 0 chm dev_food_consumption dev
7 118.720676 0.000000 0.047488 0.000000 0 bio dev_food_consumption dev
8 57.737806 0.000000 0.023095 0.000000 0 bc dev_food_consumption dev
9 514.791060 167.875690 0.205916 0.067150 0 chm dev_maternal_wastage dev
10 500.609270 235.457789 0.200244 0.094183 0 bio dev_maternal_wastage dev

Merge ROC/AUC and VUS

In [262]:
"""
PERF0 = pd.merge(PS0,V0,left_on=['cl','tox','effect','sim_type'],right_on=['cl','tox','effect','sim_type'],how='outer')
PERF0.auc_volf1.fillna(0,inplace=True)
# concatentation: VUS_volf1, ROC AUC, pval, n+,n-
#PERF0['res2'] = PERF0.apply(lambda i: "%3.2f %3.2f %s(%d+,%d-)" % (i[2],mksig(i[4]),i[5],i[0]),axis=1)
PERF0['res2'] = PERF0.apply(lambda i: "%3.2f %3.2f %s(%d+,%d-,%d)" % (i['auc_volf1'],i['auc'],mksig(i['auc_pval']),i['n_pos'],i['n_neg'],i['cl_n']),axis=1)
PERF0['n_perf'] = PERF0.n_pos+PERF0.n_neg
#PERF0.res2[PERF0.index[pd.isnull(PERF0.VUS_vol)]]=''

PERF1 = pd.pivot_table(PERF0,index=['cl','tox','effect','cl_n','n_perf'],columns='sim_type',values=['auc_volf1','auc','auc_pval','res2'],
                       aggfunc=lambda i: np.unique(i)[0])

"""
Out[262]:
'\nPERF0 = pd.merge(PS0,V0,left_on=[\'cl\',\'tox\',\'effect\',\'sim_type\'],right_on=[\'cl\',\'tox\',\'effect\',\'sim_type\'],how=\'outer\')\nPERF0.auc_volf1.fillna(0,inplace=True)\n# concatentation: VUS_volf1, ROC AUC, pval, n+,n-\n#PERF0[\'res2\'] = PERF0.apply(lambda i: "%3.2f %3.2f %s(%d+,%d-)" % (i[2],mksig(i[4]),i[5],i[0]),axis=1)\nPERF0[\'res2\'] = PERF0.apply(lambda i: "%3.2f %3.2f %s(%d+,%d-,%d)" % (i[\'auc_volf1\'],i[\'auc\'],mksig(i[\'auc_pval\']),i[\'n_pos\'],i[\'n_neg\'],i[\'cl_n\']),axis=1)\nPERF0[\'n_perf\'] = PERF0.n_pos+PERF0.n_neg\n#PERF0.res2[PERF0.index[pd.isnull(PERF0.VUS_vol)]]=\'\'\n\nPERF1 = pd.pivot_table(PERF0,index=[\'cl\',\'tox\',\'effect\',\'cl_n\',\'n_perf\'],columns=\'sim_type\',values=[\'auc_volf1\',\'auc\',\'auc_pval\',\'res2\'],\n                       aggfunc=lambda i: np.unique(i)[0])\n\n'
In [181]:
x=[0,13,22,6,29,12]
P1_vol = PERF_vol.select(lambda x: x[0] in x)
P1_vol.ix[np.where((P1_vol>0.75).sum(axis=1)>0)]
Out[181]:
sim_type bc bio chm
cl tox effect
5 chr chr_body_weight 0.617104 0.082518 0.769226
chr_ovary 0.617104 0.082518 0.769226
chr_parathyroid_gland 0.617104 0.082518 0.769226
chr_pituitary_gland 0.617104 0.082518 0.769226
chr_thyroid_gland 0.617104 0.082518 0.769226
10 chr chr_clinical_chemistry 0.760124 0.345216 0.768459
chr_food_consumption 0.668174 0.342450 0.753446
chr_kidney 0.755633 0.286922 0.767791
11 chr chr_thymus 0.613761 0.166593 0.820759
12 mgr mgr_age_landmark 0.246632 0.192755 0.756789
mgr_developmental_landmark 0.246632 0.192755 0.756789
mgr_hair_growth 0.246632 0.192755 0.756789
19 chr chr_body_weight 0.625520 0.758349 0.704801
dev dev_food_consumption 0.669297 0.787608 0.476795
dev_ureter 0.762750 0.735700 0.693668
mgr mgr_sexual_developmental_landmark 0.685706 0.808445 0.722415
mgr_water_consumption 0.719722 0.808756 0.465403
sub sub_estrous_cycle 0.345597 0.818368 0.369229
sub_eye 0.597859 0.503498 0.789788
sub_nose 0.345597 0.818368 0.369229
sub_preputial_gland 0.345597 0.818368 0.369229
sub_skeletal_muscle 0.566775 0.761255 0.636861
sub_sperm_measure 0.345597 0.818368 0.369229
sub_tongue 0.574601 0.818328 0.580462
sub_urinalysis 0.665631 0.672596 0.782579
21 dev dev_hematology 0.722963 0.696837 0.787358
sub sub_lymph_node 0.282021 0.782465 0.276336
sub_ovary 0.282021 0.782465 0.276336
sub_thymus 0.282021 0.782465 0.276336
sub_uterus 0.282021 0.782465 0.276336
... ... ... ... ... ...
75 sub sub_liver 0.544417 0.800239 0.294044
sub_spinal_cord 0.544417 0.800239 0.294044
76 chr chr_brain 0.566908 0.513196 0.770841
chr_clinical_chemistry 0.270581 0.755313 0.514739
chr_kidney 0.270581 0.755313 0.514739
dev dev_heart 0.523003 0.312989 0.754124
dev_hematology 0.280936 0.784062 0.471508
dev_limb 0.528813 0.247416 0.779136
dev_mortality 0.280936 0.784062 0.471508
dev_squamosal 0.280936 0.784062 0.471508
sub sub_clinical_signs 0.280936 0.784062 0.471508
sub_eye 0.618427 0.792649 0.764481
sub_heart 0.503664 0.788795 0.673980
sub_skin 0.528813 0.247416 0.779136
80 chr chr_brain 0.560812 0.495356 0.768576
mgr mgr_body_weight 0.657462 0.690634 0.795845
mgr_bone 0.464522 0.283251 0.775799
mgr_intestine_large 0.657462 0.690634 0.795845
mgr_urinary_bladder 0.653739 0.477013 0.788073
sub sub_intestine_small 0.462256 0.182290 0.773625
sub_prostate 0.671432 0.535696 0.760091
sub_spleen 0.671432 0.535696 0.760091
sub_urinalysis 0.716573 0.410989 0.841579
sub_water_consumption 0.671432 0.535696 0.760091
85 dev dev_food_consumption 0.574070 0.485713 0.855638
sub sub_body_weight 0.465754 0.717500 0.818498
sub_hematology 0.511686 0.490570 0.868607
sub_kidney 0.382095 0.415409 0.842941
sub_tissue_nos 0.465754 0.717500 0.818498
sub_urinary_bladder 0.464212 0.317805 0.890014

188 rows × 3 columns

In [103]:
X1=PERF_vol.xs(80)
#tox1=list(X1.ix[X1.index[(X1>0.9).any(axis=1)]].index)
#X1.ix[X1.index[(X1>0.7).any(axis=1)]]
X1.ix[X1.index[(X1>0.75).any(axis=1)]]
tox1=list(X1.ix[X1.index[(X1>0.75).any(axis=1)]].index)
tox1=list(X1.index[(X1>0.75).any(axis=1)])
Out[103]:
['chr_brain',
 'mgr_body_weight',
 'mgr_bone',
 'mgr_intestine_large',
 'mgr_urinary_bladder',
 'sub_intestine_small',
 'sub_prostate',
 'sub_spleen',
 'sub_urinalysis',
 'sub_water_consumption']

Summarize performance for all cluster, descriptors and outcomes

In [249]:
def mkcol(x): 
    y = x[0]
    if len(x)==2 and x[1]!='':
        y += '_'+x[1]
    for i in range(1,len(x)-1):
        if x[i] or x[i]!='':
            y += x[i]
    return y

G=P_ALL.groupby(['cl','tox','effect','sim_type','n_pos','n_neg','cl_n'])
P0=G.aggregate(dict(auc=dict(mn=np.mean,sd=np.std),t0_max=dict(mn=np.mean,sd=np.std),
                    sn_max=dict(mn=np.mean,sd=np.std),sp_max=dict(mn=np.mean,sd=np.std)))
In [250]:
P0['AUC']=P0['auc'].ix[:10].apply(lambda i: "%3.2f(%1.1f)" % (i[0],i[1]),axis=1)
P0['Spec']=P0['sp_max'].ix[:10].apply(lambda i: "%3.2f(%1.1f)" % (i[0],i[1]),axis=1)
P0['Sens']=P0['sn_max'].ix[:10].apply(lambda i: "%3.2f(%1.1f)" % (i[0],i[1]),axis=1)
P0['GenRA0']=P0['t0_max'].ix[:10].apply(lambda i: "%3.2f(%1.1f)" % (i[0],i[1]),axis=1)

#P0[['AUC','Sens','Spec','y0']].ix[:10]
In [256]:
P0[('t0_max')].mn.hist()
P_ALL.t0_max.hist()
Out[256]:
<matplotlib.axes.AxesSubplot at 0x4ec62ad0>
In [261]:
#pl.scatter(P0[('AUC','mn')])
P0.columns
Out[261]:
MultiIndex(levels=[[u'sp_max', u't0_max', u'auc', u'sn_max', u'AUC', u'Spec', u'Sens', u'GenRA0'], [u'mn', u'sd', u'']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7], [0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2]])
In [242]:
VUS=PERF_vol.copy()
VUS[VUS>1]=1
#VUS_sig=VUS.ix[(VUS>0.4).any(axis=1)]
#VUS_sig.ix[:10]
In [62]:
VUS.bc.hist(bins=20)
Out[62]:
<matplotlib.axes.AxesSubplot at 0xf933150>

Performance comparison VUS

In [183]:
B = VUS.copy()
B[B<0.5]=0
B[B>=0.5]=1
VUS_summary= B.copy()
VUS_summary['Any bio|chm|bc'] = (B>0).any(axis=1)
VUS_summary['Only chm'] = B.apply(lambda x: x[1]==0 and x[0]==0 and x[2]==1,axis=1)
VUS_summary['Only bio'] = B.apply(lambda x: x[1]==1 and x[0]==0 and x[2]==0,axis=1)
VUS_summary['Only bc'] = B.apply(lambda x: x[1]==0 and x[0]==1 and x[2]==0,axis=1)
VUS_summary['chm>bio|bc'] = VUS.apply(lambda x: x[2]>=0.5 and x[2]>x[1] and x[2]>x[0],axis=1)
VUS_summary['bio>chm|bc'] = VUS.apply(lambda x: x[1]>=0.5 and x[1]>x[2] and x[1]>x[0],axis=1)
VUS_summary['bc>chm|bio'] = VUS.apply(lambda x: x[0]>=0.5 and x[0]>x[2] and x[0]>x[1],axis=1)
#VUS_summary=VUS_summary.ix[:,3:]
#Just to keep count
VUS_summary['n']=1
VUS_summary=VUS_summary[['n',  u'chm',u'bio',u'bc', u'Any bio|chm|bc', u'Only chm', u'Only bio',
       u'Only bc', u'chm>bio|bc', u'bio>chm|bc', u'bc>chm|bio']]
VUS_summary.sum(axis=0)
Out[183]:
sim_type
n                 3665
chm                924
bio                856
bc                 770
Any bio|chm|bc    1378
Only chm           244
Only bio           224
Only bc             47
chm>bio|bc         567
bio>chm|bc         538
bc>chm|bio         217
dtype: float64
In [186]:
VUS_summary.ix[:2]
#len(set([i[0] for i in VUS_summary.index]))
Out[186]:
sim_type n chm bio bc Any bio|chm|bc Only chm Only bio Only bc chm>bio|bc bio>chm|bc bc>chm|bio
cl tox effect
0 dev dev_bone 1 0 0 0 False False False False False False False
dev_clinical_signs 1 0 0 0 False False False False False False False
In [278]:
VUS_summary.groupby(level=[1]).aggregate(np.sum)
Out[278]:
sim_type n chm bio bc Any bio|chm|bc Only chm Only bio Only bc chm>bio|bc bio>chm|bc bc>chm|bio
tox
chr 1116 300 235 211 423 92 75 11 211 148 44
dev 712 169 182 164 272 41 39 13 104 105 53
dnt 84 23 31 31 33 0 2 2 6 19 8
mgr 668 171 160 150 247 55 26 3 102 94 48
rep 34 0 0 0 0 0 0 0 0 0 0
sac 128 6 17 3 21 3 15 0 4 17 0
sub 923 255 231 211 382 53 67 18 140 155 64
In [279]:
P2.ix[:10]
Out[279]:
n_pos n_neg cl_n auc_mn auc_sd AUC Sens Spec GenRA0 VUS tested_n untested_n
cl tox effect sim_type
0 dev dev_bone bc 2 1 13 0.062500 0.109789 0.06(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.016269 3 10
bio 2 1 13 0.138889 0.125988 0.14(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.052765 3 10
chm 2 1 13 0.083333 0.118958 0.08(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.040016 3 10
dev_clinical_signs bc 2 1 13 0.062500 0.109789 0.06(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.016269 3 10
bio 2 1 13 0.138889 0.125988 0.14(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.052765 3 10
chm 2 1 13 0.083333 0.118958 0.08(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.040016 3 10
dev_food_consumption bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_maternal_wastage bc 1 2 13 0.291667 0.437117 0.29(0.4) 0.25(0.4) 1.00(0.0) 1.24(0.2) 0.056750 3 10
bio 1 2 13 0.527778 0.491999 0.53(0.5) 0.50(0.5) 1.00(0.0) 1.15(0.2) 0.200244 3 10
chm 1 2 13 0.407407 0.445883 0.41(0.4) 0.33(0.5) 1.00(0.0) 1.22(0.2) 0.205916 3 10
dev_mouth_/_jaw bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_offspring_survival-early bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_offspring_survival-late bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_reproductive_performance bc 1 2 13 0.062500 0.109789 0.06(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.016269 3 10
bio 1 2 13 0.138889 0.125988 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.052765 3 10
chm 1 2 13 0.083333 0.118958 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.040016 3 10
dev_sexual_developmental_landmark bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_sperm_morphology bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
10 sub sub_skeletal_muscle bc 1 8 35 0.702778 0.382072 0.70(0.4) 0.60(0.5) 1.00(0.0) 1.06(0.1) 0.675739 9 26
bio 1 8 35 0.637153 0.378501 0.64(0.4) 0.50(0.5) 1.00(0.0) 1.36(0.4) 0.251533 9 26
chm 1 8 35 0.644676 0.380429 0.64(0.4) 0.50(0.5) 1.00(0.0) 1.06(0.1) 0.570717 9 26
sub_skin bc 1 8 35 0.702778 0.382072 0.70(0.4) 0.60(0.5) 1.00(0.0) 1.06(0.1) 0.675739 9 26
bio 1 8 35 0.637153 0.378501 0.64(0.4) 0.50(0.5) 1.00(0.0) 1.36(0.4) 0.251533 9 26
chm 1 8 35 0.644676 0.380429 0.64(0.4) 0.50(0.5) 1.00(0.0) 1.06(0.1) 0.570717 9 26
sub_spleen bc 1 8 35 0.716667 0.376323 0.72(0.4) 0.60(0.5) 1.00(0.0) 1.04(0.1) 0.647340 9 26
bio 1 8 35 0.677083 0.348626 0.68(0.3) 0.50(0.5) 1.00(0.0) 1.07(0.1) 0.273003 9 26
chm 1 8 35 0.511574 0.377561 0.51(0.4) 0.33(0.5) 1.00(0.0) 1.21(0.2) 0.393840 9 26
sub_stomach bc 2 7 35 0.648413 0.363415 0.65(0.4) 0.57(0.4) 0.99(0.1) 0.98(0.3) 0.508003 9 26
bio 2 7 35 0.738095 0.286224 0.74(0.3) 0.67(0.5) 0.91(0.2) 0.92(0.4) 0.274908 9 26
chm 2 7 35 0.414021 0.349274 0.41(0.3) 0.44(0.5) 0.84(0.3) 1.13(0.6) 0.331652 9 26
sub_testes bc 4 5 35 0.767222 0.265048 0.77(0.3) 0.75(0.3) 0.93(0.1) 0.85(0.3) 0.637498 9 26
bio 4 5 35 0.707639 0.312868 0.71(0.3) 0.73(0.4) 0.86(0.3) 1.05(0.6) 0.257348 9 26
chm 4 5 35 0.664352 0.222846 0.66(0.2) 0.59(0.3) 0.98(0.1) 0.94(0.2) 0.519127 9 26
sub_thymus bc 3 6 35 0.623457 0.401996 0.62(0.4) 0.56(0.4) 0.99(0.0) 1.07(0.3) 0.558693 9 26
bio 3 6 35 0.668981 0.341912 0.67(0.3) 0.63(0.5) 0.91(0.2) 0.97(0.4) 0.245487 9 26
chm 3 6 35 0.500000 0.337500 0.50(0.3) 0.46(0.4) 0.91(0.2) 1.11(0.4) 0.440463 9 26
sub_trachea bc 1 8 35 0.716667 0.376323 0.72(0.4) 0.60(0.5) 1.00(0.0) 1.04(0.1) 0.647340 9 26
bio 1 8 35 0.677083 0.348626 0.68(0.3) 0.50(0.5) 1.00(0.0) 1.07(0.1) 0.273003 9 26
chm 1 8 35 0.511574 0.377561 0.51(0.4) 0.33(0.5) 1.00(0.0) 1.21(0.2) 0.393840 9 26
sub_urinalysis bc 1 8 35 0.719444 0.359930 0.72(0.4) 0.60(0.5) 1.00(0.0) 1.09(0.1) 0.681676 9 26
bio 1 8 35 0.619792 0.404046 0.62(0.4) 0.50(0.5) 1.00(0.0) 1.13(0.2) 0.252956 9 26
chm 1 8 35 0.780093 0.332661 0.78(0.3) 0.67(0.5) 1.00(0.0) 1.05(0.1) 0.668352 9 26
sub_uterus bc 2 7 35 0.873016 0.217849 0.87(0.2) 0.93(0.2) 0.91(0.2) 0.78(0.4) 0.740531 9 26
bio 2 7 35 0.760913 0.297195 0.76(0.3) 0.92(0.3) 0.80(0.3) 0.73(0.5) 0.315979 9 26
chm 2 7 35 0.889550 0.172507 0.89(0.2) 0.84(0.2) 0.97(0.1) 0.77(0.4) 0.670975 9 26
sub_vagina bc 1 8 35 0.719444 0.359930 0.72(0.4) 0.60(0.5) 1.00(0.0) 1.09(0.1) 0.681676 9 26
bio 1 8 35 0.619792 0.404046 0.62(0.4) 0.50(0.5) 1.00(0.0) 1.13(0.2) 0.252956 9 26
chm 1 8 35 0.780093 0.332661 0.78(0.3) 0.67(0.5) 1.00(0.0) 1.05(0.1) 0.668352 9 26

1467 rows × 12 columns

Performance comparison AUC

In [283]:
PERF_auc=pd.pivot_table(P2.reset_index(),index=['cl','tox','effect'],columns='sim_type',values='auc_mn')
In [367]:
PERF_auc.ix[:3]
Out[367]:
sim_type bc bio chm
cl tox effect
0 dev dev_bone 0.062500 0.138889 0.083333
dev_clinical_signs 0.062500 0.138889 0.083333
dev_food_consumption 0.083333 0.125000 0.138889
dev_maternal_wastage 0.291667 0.527778 0.407407
dev_mouth_/_jaw 0.083333 0.125000 0.138889
dev_offspring_survival-early 0.083333 0.125000 0.138889
dev_offspring_survival-late 0.083333 0.125000 0.138889
dev_reproductive_performance 0.062500 0.138889 0.083333
dev_sexual_developmental_landmark 0.083333 0.125000 0.138889
dev_sperm_morphology 0.083333 0.125000 0.138889
mgr mgr_adrenal_gland 0.312500 0.240741 0.506173
mgr_age_landmark 0.041667 0.123457 0.148148
mgr_brain 0.083333 0.388889 0.317901
mgr_clinical_signs 0.157407 0.185185 0.436214
mgr_developmental_landmark 0.041667 0.123457 0.148148
mgr_food_consumption 0.312500 0.240741 0.506173
mgr_kidney 0.041667 0.123457 0.148148
mgr_liver 0.041667 0.123457 0.148148
mgr_motor_activity 0.342593 0.197531 0.633745
mgr_offspring_survival-early 0.041667 0.123457 0.148148
mgr_offspring_survival-late 0.222222 0.703704 0.207819
mgr_pituitary_gland 0.157407 0.185185 0.436214
mgr_reproductive_performance 0.041667 0.123457 0.148148
mgr_seminal_vesicle 0.222222 0.703704 0.207819
mgr_sexual_developmental_landmark 0.041667 0.123457 0.148148
mgr_sperm_measure 0.222222 0.703704 0.207819
mgr_spleen 0.222222 0.703704 0.207819
mgr_testes 0.157407 0.185185 0.436214
sub sub_body_weight 0.439815 0.339506 0.798354
sub_brain 0.163194 0.236111 0.405864
... ... ... ... ... ...
1 mgr mgr_reproductive_performance 0.134259 0.194444 0.180556
mgr_sexual_developmental_landmark 0.435185 0.722222 0.395833
mgr_spleen 0.327160 0.688272 0.319444
mgr_stomach 0.435185 0.722222 0.395833
mgr_thyroid_gland 0.435185 0.722222 0.395833
sub sub_adrenal_gland 0.000000 0.179012 0.123457
sub_bone_marrow 0.111111 0.469136 0.395062
sub_clinical_signs 0.000000 0.179012 0.123457
sub_estrous_cycle 0.055556 0.228395 0.395062
sub_food_consumption 0.055556 0.228395 0.395062
sub_heart 0.000000 0.179012 0.123457
sub_hematology 0.055556 0.703704 0.123457
sub_kidney 0.055556 0.703704 0.123457
sub_lung 0.000000 0.179012 0.123457
sub_mortality 0.000000 0.179012 0.123457
sub_pancreas 0.055556 0.703704 0.123457
sub_pituitary_gland 0.000000 0.179012 0.123457
sub_prostate 0.000000 0.179012 0.123457
sub_sperm_measure 0.055556 0.228395 0.395062
sub_spleen 0.083333 0.398148 0.259259
sub_testes 0.055556 0.703704 0.123457
sub_thyroid_gland 0.055556 0.703704 0.123457
sub_urinalysis 0.111111 0.469136 0.395062
sub_urinary_bladder 0.055556 0.703704 0.123457
sub_uterus 0.000000 0.179012 0.123457
3 sub sub_body_weight 0.161111 0.000000 0.175926
sub_bone_marrow 0.150000 0.500000 0.125000
sub_clinical_signs 0.622222 0.250000 0.564815
sub_hematology 0.150000 0.500000 0.125000
sub_kidney 0.622222 0.250000 0.564815

177 rows × 3 columns

In [398]:
B = PERF_auc.copy()
auc0=0.75
B[B<auc0]=0
B[B>=auc0]=1
AUC_summary= B.copy()
#AUC_summary['Only chm'] = B.apply(lambda x: x[1]==0 and x[0]==0 and x[2]==1,axis=1)
#AUC_summary['Only bio'] = B.apply(lambda x: x[1]==1 and x[0]==0 and x[2]==0,axis=1)
#AUC_summary['Only bc'] = B.apply(lambda x: x[1]==0 and x[0]==1 and x[2]==0,axis=1)
AUC_summary['chm>bio|bc'] = PERF_auc.apply(lambda x: x[2]>=auc0 and x[2]>x[1] and x[2]>x[0],axis=1)
AUC_summary['bio>chm|bc'] = PERF_auc.apply(lambda x: x[1]>=auc0 and x[1]>x[2] and x[1]>x[0],axis=1)
AUC_summary['bc>chm|bio'] = PERF_auc.apply(lambda x: x[0]>=auc0 and x[0]>x[2] and x[0]>x[1],axis=1)
AUC_summary['Any'] = (B>0).any(axis=1)
#VUS_summary=VUS_summary.ix[:,3:]
#Just to keep count
AUC_summary['n']=1
AUC_summary=AUC_summary[['n',  u'chm',u'bio',u'bc', 
                         #u'Only chm', u'Only bio',u'Only bc', 
                         u'chm>bio|bc', u'bio>chm|bc', u'bc>chm|bio',
                         'Any']]
In [403]:
AUC_RES = AUC_summary.groupby(level=[1]).aggregate(np.sum)
AUC_RES.ix['All'] = AUC_summary.sum(axis=0)
AUC_RES
Out[403]:
sim_type n chm bio bc chm>bio|bc bio>chm|bc bc>chm|bio Any
tox
chr 1115 97 133 67 75 107 21 203
dev 712 63 138 50 43 117 7 167
dnt 84 16 26 15 5 20 2 27
mgr 668 78 126 56 46 106 4 156
rep 34 0 0 0 0 0 0 0
sac 128 2 7 6 1 6 2 9
sub 922 107 140 78 64 122 21 212
All 3663 363 570 272 234 478 57 774
In [429]:
#AUC_RES['P%']=100.0*AUC_RES.Any/AUC_RES.n
#AUC_RES.ix['P%']=100.0*AUC_RES.ix['All'] /AUC_RES.n.ix['All'] 
AUC_RES
Out[429]:
sim_type n chm bio bc chm>bio|bc bio>chm|bc bc>chm|bio Any
tox
chr 1115 97 133 67 75 107 21 203
dev 712 63 138 50 43 117 7 167
dnt 84 16 26 15 5 20 2 27
mgr 668 78 126 56 46 106 4 156
rep 34 0 0 0 0 0 0 0
sac 128 2 7 6 1 6 2 9
sub 922 107 140 78 64 122 21 212
All 3663 363 570 272 234 478 57 774
In [405]:
AUC_RES_P=AUC_RES.apply(lambda x: 100.0*x/AUC_RES.n)
AUC_RES_P=np.round(AUC_RES_P,decimals=0)
Out[405]:
sim_type n chm bio bc chm>bio|bc bio>chm|bc bc>chm|bio Any
tox
chr 100 9 12 6 7 10 2 18
dev 100 9 19 7 6 16 1 23
dnt 100 19 31 18 6 24 2 32
mgr 100 12 19 8 7 16 1 23
rep 100 0 0 0 0 0 0 0
sac 100 2 5 5 1 5 2 7
sub 100 12 15 8 7 13 2 23
All 100 10 16 7 6 13 2 21
In [428]:
AUC_RES_P.sort('Any',ascending=False)
Out[428]:
sim_type n chm bio bc chm>bio|bc bio>chm|bc bc>chm|bio Any
tox
dnt 100 19.047619 30.952381 17.857143 5.952381 23.809524 2.380952 32.142857
dev 100 8.848315 19.382022 7.022472 6.039326 16.432584 0.983146 23.455056
mgr 100 11.676647 18.862275 8.383234 6.886228 15.868263 0.598802 23.353293
sub 100 11.605206 15.184382 8.459870 6.941432 13.232104 2.277657 22.993492
All 100 9.909910 15.561016 7.425607 6.388206 13.049413 1.556102 21.130221
chr 100 8.699552 11.928251 6.008969 6.726457 9.596413 1.883408 18.206278
sac 100 1.562500 5.468750 4.687500 0.781250 4.687500 1.562500 7.031250
rep 100 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
In [421]:
X = np.round(AUC_RES,decimals=0).copy()
for c in X.columns: X[c] = X[c].astype(np.uint)
for c in X.columns: X[c] = X[c].astype(np.str)

Y = np.round(AUC_RES_P,decimals=0).copy()
for c in Y.columns: Y[c] = Y[c].astype(np.uint)
for c in Y.columns: Y[c] = Y[c].astype(np.str)
Z = X+ ' / ' + Y + '%'
Z['n']=X.n
Z
Out[421]:
sim_type n chm bio bc chm>bio|bc bio>chm|bc bc>chm|bio Any
tox
chr 1115 97 / 9% 133 / 12% 67 / 6% 75 / 7% 107 / 10% 21 / 2% 203 / 18%
dev 712 63 / 9% 138 / 19% 50 / 7% 43 / 6% 117 / 16% 7 / 1% 167 / 23%
dnt 84 16 / 19% 26 / 31% 15 / 18% 5 / 6% 20 / 24% 2 / 2% 27 / 32%
mgr 668 78 / 12% 126 / 19% 56 / 8% 46 / 7% 106 / 16% 4 / 1% 156 / 23%
rep 34 0 / 0% 0 / 0% 0 / 0% 0 / 0% 0 / 0% 0 / 0% 0 / 0%
sac 128 2 / 2% 7 / 5% 6 / 5% 1 / 1% 6 / 5% 2 / 2% 9 / 7%
sub 922 107 / 12% 140 / 15% 78 / 8% 64 / 7% 122 / 13% 21 / 2% 212 / 23%
All 3663 363 / 10% 570 / 16% 272 / 7% 234 / 6% 478 / 13% 57 / 2% 774 / 21%
In [299]:
CL_vus10={}
for (cl,tox),X in VUS_sig.groupby(level=[0,1]):
    #print cl,tox,X.shape
    #X.sort(['chm','bio','bc'],ascending=False,inplace=True)
    CL_vus10[cl]=[i[2] for i in X.index]
In [270]:
#VUS_sig=VUS.ix[(VUS>0.50).any(axis=1)]
X1=pd.melt(VUS.reset_index(),id_vars=['cl','tox','effect'],value_vars=['chm','bio','bc'],var_name='sim_type',value_name='VUS')
X1.set_index(['cl','tox','effect','sim_type'],inplace=True)

X2=P0[['auc','AUC','Sens','Spec','GenRA0']].reset_index().set_index(['cl','tox','effect','sim_type'])
X2.columns=map(mkcol,X2.columns)
P2 = pd.merge(X2,X1,left_index=True,right_index=True)
P2['tested_n']=P2.n_pos+P2.n_neg
P2['untested_n']=P2.cl_n-P2.tested_n
P21 = P2[['cl_n','untested_n','tested_n','n_pos','n_neg','VUS','AUC','Sens','Spec']]
In [271]:
P21.ix[:10]
#P0.ix[:10]
#P0[['auc','AUC','Sens','Spec','GenRA0']].reset_index().set_index(['cl','tox','effect','sim_type']).ix[:10]
P2.ix[:10]
Out[271]:
n_pos n_neg cl_n auc_mn auc_sd AUC Sens Spec GenRA0 VUS tested_n untested_n
cl tox effect sim_type
0 dev dev_bone bc 2 1 13 0.062500 0.109789 0.06(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.016269 3 10
bio 2 1 13 0.138889 0.125988 0.14(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.052765 3 10
chm 2 1 13 0.083333 0.118958 0.08(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.040016 3 10
dev_clinical_signs bc 2 1 13 0.062500 0.109789 0.06(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.016269 3 10
bio 2 1 13 0.138889 0.125988 0.14(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.052765 3 10
chm 2 1 13 0.083333 0.118958 0.08(0.1) 0.00(0.0) 1.00(0.0) 2.00(0.0) 0.040016 3 10
dev_food_consumption bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_maternal_wastage bc 1 2 13 0.291667 0.437117 0.29(0.4) 0.25(0.4) 1.00(0.0) 1.24(0.2) 0.056750 3 10
bio 1 2 13 0.527778 0.491999 0.53(0.5) 0.50(0.5) 1.00(0.0) 1.15(0.2) 0.200244 3 10
chm 1 2 13 0.407407 0.445883 0.41(0.4) 0.33(0.5) 1.00(0.0) 1.22(0.2) 0.205916 3 10
dev_mouth_/_jaw bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_offspring_survival-early bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_offspring_survival-late bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_reproductive_performance bc 1 2 13 0.062500 0.109789 0.06(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.016269 3 10
bio 1 2 13 0.138889 0.125988 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.052765 3 10
chm 1 2 13 0.083333 0.118958 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.040016 3 10
dev_sexual_developmental_landmark bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
dev_sperm_morphology bc 1 2 13 0.083333 0.119523 0.08(0.1) 0.00(0.0) 1.00(0.0) 1.76(0.2) 0.023095 3 10
bio 1 2 13 0.125000 0.126773 0.12(0.1) 0.00(0.0) 1.00(0.0) 1.85(0.2) 0.047488 3 10
chm 1 2 13 0.138889 0.125392 0.14(0.1) 0.00(0.0) 1.00(0.0) 1.78(0.2) 0.146423 3 10
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
10 sub sub_skeletal_muscle bc 1 8 35 0.702778 0.382072 0.70(0.4) 0.60(0.5) 1.00(0.0) 1.06(0.1) 0.675739 9 26
bio 1 8 35 0.637153 0.378501 0.64(0.4) 0.50(0.5) 1.00(0.0) 1.36(0.4) 0.251533 9 26
chm 1 8 35 0.644676 0.380429 0.64(0.4) 0.50(0.5) 1.00(0.0) 1.06(0.1) 0.570717 9 26
sub_skin bc 1 8 35 0.702778 0.382072 0.70(0.4) 0.60(0.5) 1.00(0.0) 1.06(0.1) 0.675739 9 26
bio 1 8 35 0.637153 0.378501 0.64(0.4) 0.50(0.5) 1.00(0.0) 1.36(0.4) 0.251533 9 26
chm 1 8 35 0.644676 0.380429 0.64(0.4) 0.50(0.5) 1.00(0.0) 1.06(0.1) 0.570717 9 26
sub_spleen bc 1 8 35 0.716667 0.376323 0.72(0.4) 0.60(0.5) 1.00(0.0) 1.04(0.1) 0.647340 9 26
bio 1 8 35 0.677083 0.348626 0.68(0.3) 0.50(0.5) 1.00(0.0) 1.07(0.1) 0.273003 9 26
chm 1 8 35 0.511574 0.377561 0.51(0.4) 0.33(0.5) 1.00(0.0) 1.21(0.2) 0.393840 9 26
sub_stomach bc 2 7 35 0.648413 0.363415 0.65(0.4) 0.57(0.4) 0.99(0.1) 0.98(0.3) 0.508003 9 26
bio 2 7 35 0.738095 0.286224 0.74(0.3) 0.67(0.5) 0.91(0.2) 0.92(0.4) 0.274908 9 26
chm 2 7 35 0.414021 0.349274 0.41(0.3) 0.44(0.5) 0.84(0.3) 1.13(0.6) 0.331652 9 26
sub_testes bc 4 5 35 0.767222 0.265048 0.77(0.3) 0.75(0.3) 0.93(0.1) 0.85(0.3) 0.637498 9 26
bio 4 5 35 0.707639 0.312868 0.71(0.3) 0.73(0.4) 0.86(0.3) 1.05(0.6) 0.257348 9 26
chm 4 5 35 0.664352 0.222846 0.66(0.2) 0.59(0.3) 0.98(0.1) 0.94(0.2) 0.519127 9 26
sub_thymus bc 3 6 35 0.623457 0.401996 0.62(0.4) 0.56(0.4) 0.99(0.0) 1.07(0.3) 0.558693 9 26
bio 3 6 35 0.668981 0.341912 0.67(0.3) 0.63(0.5) 0.91(0.2) 0.97(0.4) 0.245487 9 26
chm 3 6 35 0.500000 0.337500 0.50(0.3) 0.46(0.4) 0.91(0.2) 1.11(0.4) 0.440463 9 26
sub_trachea bc 1 8 35 0.716667 0.376323 0.72(0.4) 0.60(0.5) 1.00(0.0) 1.04(0.1) 0.647340 9 26
bio 1 8 35 0.677083 0.348626 0.68(0.3) 0.50(0.5) 1.00(0.0) 1.07(0.1) 0.273003 9 26
chm 1 8 35 0.511574 0.377561 0.51(0.4) 0.33(0.5) 1.00(0.0) 1.21(0.2) 0.393840 9 26
sub_urinalysis bc 1 8 35 0.719444 0.359930 0.72(0.4) 0.60(0.5) 1.00(0.0) 1.09(0.1) 0.681676 9 26
bio 1 8 35 0.619792 0.404046 0.62(0.4) 0.50(0.5) 1.00(0.0) 1.13(0.2) 0.252956 9 26
chm 1 8 35 0.780093 0.332661 0.78(0.3) 0.67(0.5) 1.00(0.0) 1.05(0.1) 0.668352 9 26
sub_uterus bc 2 7 35 0.873016 0.217849 0.87(0.2) 0.93(0.2) 0.91(0.2) 0.78(0.4) 0.740531 9 26
bio 2 7 35 0.760913 0.297195 0.76(0.3) 0.92(0.3) 0.80(0.3) 0.73(0.5) 0.315979 9 26
chm 2 7 35 0.889550 0.172507 0.89(0.2) 0.84(0.2) 0.97(0.1) 0.77(0.4) 0.670975 9 26
sub_vagina bc 1 8 35 0.719444 0.359930 0.72(0.4) 0.60(0.5) 1.00(0.0) 1.09(0.1) 0.681676 9 26
bio 1 8 35 0.619792 0.404046 0.62(0.4) 0.50(0.5) 1.00(0.0) 1.13(0.2) 0.252956 9 26
chm 1 8 35 0.780093 0.332661 0.78(0.3) 0.67(0.5) 1.00(0.0) 1.05(0.1) 0.668352 9 26

1467 rows × 12 columns

In [346]:
#P.T.to_excel('/share/home/ishah/projects/Chem/data/results/genra-summary-%s.xls' % tmstmp)

# Output clusters to worksheet
W = pd.ExcelWriter(RES_DIR+'/genra-perf-summary-full-%s.xls' % tmstmp,engine='openpyxl')
for cl,P in P2.groupby(level=[0]):
    #X = P.reset_index()
    #I2= (X1<0.1).sum(axis=1)>0
    #X.columns = [i.replace('max','') for i in map(mkcol,X.columns)]
    P.to_excel(W,sheet_name="Cluster-%d" %cl,na_rep='')
W.save()
    

Figures and Results

In [585]:
C2.ix[C2.index[C2.chemical_name.apply(lambda x: x.lower().find('benox')>-1)]]
Out[585]:
DSSTox_GSID chemical_casrn chemical_name
ID
C98730042 29565 98730-04-2 Benoxacor
C5987826 45393 5987-82-6 Benoxinate hydrochloride
C7421401 45572 7421-40-1 Carbenoxolone sodium
In [588]:
C2M['C98730042']
Out[588]:
80
In [557]:
# Cluster 0 
X1=PERF_vol.xs(cc_id)
#len([i[1] for i in X11.index])
#X1.tox.unique(),len(X1.tox.unique()),len(X1.effect.unique())
X1
Out[557]:
sim_type bc bio chm
tox effect
chr chr_adrenal_gland 0.312603 0.375728 0.288374
chr_bone 0.322365 0.302694 0.185011
chr_bone_marrow 0.452572 0.451387 0.360484
chr_brain 0.414691 0.550617 0.361929
chr_clinical_chemistry 0.559710 0.568432 0.634867
chr_clinical_signs 0.319269 0.548075 0.316050
chr_eye 0.322365 0.302694 0.185011
chr_food_consumption 0.264522 0.541604 0.252762
chr_harderian_gland 0.322365 0.302694 0.185011
chr_heart 0.414691 0.550617 0.361929
chr_hematology 0.496921 0.537500 0.443887
chr_kidney 0.357352 0.578639 0.270581
chr_liver 0.563700 0.670874 0.526654
chr_lung 0.363292 0.525132 0.328286
chr_mammary_gland 0.182220 0.225096 0.105629
chr_mortality 0.226409 0.387818 0.068247
chr_nerve 0.322365 0.302694 0.185011
chr_ovary 0.401035 0.504559 0.318494
chr_pancreas 0.182220 0.225096 0.105629
chr_pituitary_gland 0.450284 0.482156 0.325381
chr_prostate 0.182220 0.225096 0.105629
chr_skeletal_muscle 0.192328 0.197086 0.147820
chr_spinal_cord 0.322365 0.302694 0.185011
chr_spleen 0.363292 0.525132 0.328286
chr_stomach 0.182220 0.225096 0.105629
chr_testes 0.539835 0.485364 0.416178
chr_thymus 0.412400 0.513296 0.372169
chr_thyroid_gland 0.404627 0.325637 0.111829
chr_tissue_nos 0.192328 0.197086 0.147820
chr_uncertain_primary_site 0.182220 0.225096 0.105629
... ... ... ... ...
mgr mgr_urinary_bladder 0.286215 0.347127 0.131438
mgr_uterus 0.178155 0.177940 0.102234
sub sub_adrenal_gland 0.346024 0.472219 0.161727
sub_body_weight 0.345613 0.498366 0.174434
sub_bone_marrow 0.327716 0.265147 0.188561
sub_brain 0.346024 0.472219 0.161727
sub_cervix 0.152862 0.180177 0.095546
sub_clinical_chemistry 0.686736 0.786940 0.526380
sub_clinical_signs 0.395908 0.545049 0.202484
sub_epididymis 0.152862 0.180177 0.095546
sub_eye 0.327716 0.265147 0.188561
sub_food_consumption 0.324200 0.451884 0.209787
sub_heart 0.346024 0.472219 0.161727
sub_hematology 0.326182 0.300658 0.244393
sub_intestine_large 0.146224 0.221300 0.077283
sub_kidney 0.346024 0.472219 0.161727
sub_lung 0.346024 0.472219 0.161727
sub_mortality 0.152862 0.180177 0.095546
sub_ovary 0.327716 0.265147 0.188561
sub_pituitary_gland 0.342407 0.448727 0.191974
sub_prostate 0.152862 0.180177 0.095546
sub_salivary_glands 0.152862 0.180177 0.095546
sub_spleen 0.346024 0.472219 0.161727
sub_stomach 0.262148 0.271942 0.226201
sub_testes 0.346024 0.472219 0.161727
sub_thymus 0.346024 0.472219 0.161727
sub_thyroid_gland 0.272395 0.437325 0.166180
sub_urinalysis 0.504888 0.660129 0.327908
sub_uterus 0.152862 0.180177 0.095546
sub_water_consumption 0.152862 0.180177 0.095546

107 rows × 3 columns

In [569]:
VUS_sig=VUS.ix[(VUS>0.5).any(axis=1)]
tox1=[i[1] for i in VUS_sig.xs(cc_id).index]
#tox1
tox1=[i for i in tox1 if re.match('chr_|dev_',i)]
tox1
Out[569]:
['chr_brain',
 'chr_clinical_chemistry',
 'chr_clinical_signs',
 'chr_food_consumption',
 'chr_heart',
 'chr_hematology',
 'chr_kidney',
 'chr_liver',
 'chr_lung',
 'chr_ovary',
 'chr_spleen',
 'chr_testes',
 'chr_thymus',
 'chr_urinalysis',
 'dev_body_weight',
 'dev_bone',
 'dev_clinical_signs',
 'dev_food_consumption',
 'dev_kidney',
 'dev_liver',
 'dev_maternal_wastage',
 'dev_mortality',
 'dev_mouth_/_jaw',
 'dev_reproductive_performance']
In [571]:
#CL 23
tox1=['chr_brain',
 'chr_heart',
 'chr_kidney',
 'chr_liver',
 'chr_lung',
 'chr_ovary',
 'chr_spleen',
 'chr_testes',
 'chr_thymus',
 'dev_bone',
 'dev_kidney',
 'dev_liver',
 'dev_reproductive_performance']
In [593]:
#cl 80
tox1="""chr_brain
chr_kidney
mgr_body_weight
mgr_bone
mgr_intestine_large
mgr_urinary_bladder
sub_intestine_small
sub_prostate
sub_spleen
sub_urinalysis
sub_water_consumption""".split("\n")
tox1
Out[593]:
['chr_brain',
 'chr_kidney',
 'mgr_body_weight',
 'mgr_bone',
 'mgr_intestine_large',
 'mgr_urinary_bladder',
 'sub_intestine_small',
 'sub_prostate',
 'sub_spleen',
 'sub_urinalysis',
 'sub_water_consumption']
In [517]:
X=(BCTb.ix[CID,tox1]>0).sum(axis=1)
X.sort(ascending=False)
X[:10]
CID.index(X.index[1])
Out[517]:
309
In [158]:
tox1=['dev_food_consumption',
 'dev_sexual_developmental_landmark',
 'dev_sperm_morphology',
 'mgr_brain',
 'mgr_motor_activity',
 'sub_lung',
 'sub_sperm_morphology',
 'sub_testes',
 'sub_thymus'
 ]
In [355]:
# Cl = 5
tox1 = """dev_clinical_chemistry
dev_mortality
dev_reproductive_performance
dev_spleen""".split('\n')
tox1
Out[355]:
['dev_clinical_chemistry',
 'dev_mortality',
 'dev_reproductive_performance',
 'dev_spleen']
In [443]:
', '.join(tox1)
Out[443]:
'chr_body_weight, dev_food_consumption, dev_ureter, mgr_sexual_developmental_landmark, mgr_water_consumption, sub_estrous_cycle, sub_eye, sub_nose, sub_preputial_gland, sub_skeletal_muscle, sub_sperm_measure, sub_tongue, sub_urinalysis'
In [94]:
FIG_DIR
Out[94]:
'/share/home/ishah/projects/Chem/figs/readacross/'
In [106]:
"""
dev_food_consumption                 0.319472
dev_sexual_developmental_landmark    0.319472
dev_sperm_morphology                 0.319472
mgr_brain                            0.805017
mgr_motor_activity                   0.194983
sub_lung                             0.295062
sub_sperm_morphology                 0.295062
sub_testes                           0.295062
sub_thymus                           0.295062
"""
Out[106]:
'\ndev_food_consumption                 0.319472\ndev_sexual_developmental_landmark    0.319472\ndev_sperm_morphology                 0.319472\nmgr_brain                            0.805017\nmgr_motor_activity                   0.194983\nsub_lung                             0.295062\nsub_sperm_morphology                 0.295062\nsub_testes                           0.295062\nsub_thymus                           0.295062\n'
In [594]:
CID.index('C98730042')
Out[594]:
19
In [598]:
from chem.ra.viz import *
from chem.ra.readacross import *
SM0 = getNNSimMatrices(CID,BCTb,C=Chm,B=Bio)
CH0 = getNNChmNm(CID,ChmNm1)
fig = pl.figure(figsize=(15,15))
ax = pl.subplot(1,1,1)
X,E1 = vizChmNN(CID[19],BCTb,SM0,dsc='Bio',ax=ax,c_s0=0,chem_sz=(90,60),c_knn=10,
                rs=1.0,r_min=150,
                th0=0.5*math.pi,th_tot=1.3*math.pi,
                lab_len=15,ch_fs=12,
                Effects=BCTb.ix[CID,tox1],CN=CH0,
                pred=True,t0=0.25,
                CD=CD,Mols=MOLS,
                save=True,fig_dir=FIG_DIR)
E1
Out[598]:
chr_brain                1
chr_kidney               1
mgr_body_weight          1
mgr_bone                 1
mgr_intestine_large      0
mgr_urinary_bladder      1
sub_intestine_small      0
sub_prostate             1
sub_spleen               1
sub_urinalysis           1
sub_water_consumption    1
Name: C98730042, dtype: float64
In [597]:
from chem.ra.viz import *
from chem.ra.readacross import *
SM0 = getNNSimMatrices(CID,BCTb,C=Chm,B=Bio)
CH0 = getNNChmNm(CID,ChmNm1)
fig = pl.figure(figsize=(15,15))
ax = pl.subplot(1,1,1)
X,E1 = vizChmNN(CID[19],BCTb,SM0,dsc='Chm',ax=ax,c_s0=0,chem_sz=(90,60),c_knn=10,
                rs=2,r_min=150,
                th0=0.5*math.pi,th_tot=1.3*math.pi,
                lab_len=15,ch_fs=12,
                Effects=BCTb.ix[CID,tox1],CN=CH0,
                pred=True,t0=None,
                CD=CD,Mols=MOLS,
                save=True,fig_dir=FIG_DIR)
E1
Out[597]:
chr_brain                0
chr_kidney               1
mgr_body_weight          1
mgr_bone                 0
mgr_intestine_large      0
mgr_urinary_bladder      0
sub_intestine_small      0
sub_prostate             0
sub_spleen               0
sub_urinalysis           0
sub_water_consumption    0
Name: C98730042, dtype: float64
In [ ]:
 
In [122]:
S0 = SM0['Chm'].ix[CID[1]]
S0.sort()
S0
Out[122]:
C4151502     0.239316
C29420493    0.260417
C2795393     0.282828
C754916      0.282828
C3871996     0.288660
C1763231     0.322581
C3825261     0.632353
C335762      0.909091
C375951      0.909091
C335671      0.909091
C2058948     0.909091
C375859      0.943396
C307244      1.000000
Name: C307244, dtype: float64
In [498]:
X = GenRAPred('mgr_body_weight', BCTb.ix[CID], SM0['Chm'],sim_type='chm',k0=8,s0=0.5,t0=None)
In [545]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.tri as mtri
import warnings
warnings.filterwarnings('ignore')

show_t0 = True
def smoothData(xi,yi,zi,out='ij'):
    x1,y1=np.mgrid[xi.min():xi.max():30j,yi.min():yi.max():30j]
    f = bisplrep(xi,yi,zi,s=1)
    z1= bisplev(x1[:,0],y1[0,:],f)
    
    if out =='ij':
        return np.concatenate(x1),np.concatenate(y1),np.concatenate(z1)
    else:
        return x1,y1,z1
    
#for cl in sorted([0,1,5]):
cl=cc_id
print "> CL",cl
j=0
#if cl > 5: break
VUS_sig=VUS.ix[(VUS>0.6).any(axis=1)]
#tox1=[i[1] for i in VUS_sig.xs().index]

fig = pl.figure(figsize=(15,4*len(tox1)))
for y in tox1:
    for ft in ['chm','bio','bc']:
        X = P_ALL1.xs((cl,ft,y)).reset_index()

        X['n_nn'] = X.n_sim_neg+X.n_sim_pos
        X.t0_max[X.t0_max>1]=1
        j+=1

        try:
            ax = fig.add_subplot(len(tox1)+1,3, j)

            x2,y2,z2 = smoothData(X.n_nn, X.s0, X.auc,out='xy')
            z2[z2<0]=0
            z2[z2>1]=X.auc.max()
            cf = ax.contourf(x2, y2, z2, 30,cmap=cm.coolwarm,vmin=0,vmax=1,alpha=0.9)

            x_min=X.n_nn.min()
            x_max=X.n_nn.max()
            ax.set_xlim(x_min,x_max)
            ax.set_ylim(0,X.s0.max())
            ax.set_xlabel('k',fontdict=dict(size=12))
            ax.set_ylabel('s',fontdict=dict(size=12))
            pl.setp(ax.get_xticklabels(), fontsize=8)
            pl.setp(ax.get_yticklabels(), fontsize=8)
            ax.set_title("CL-%d %s %s" % (cl,ft, y),fontdict=dict(size=14))
            if show_t0:
                x3,y3,z3 = smoothData(X.n_nn, X.s0, X.t0_max,out='xy')
                levels = np.linspace(X.t0_max.min(),X.t0_max.max(),8)
                cf2 = ax.contour(x3, y3, z3, levels,linewidths=3,cmap=cm.hot,alpha=0.8)
                pl.clabel(cf2, fmt='%2.1f', inline=1, fontsize=13)

        except:
            #print ">CL",cl,y,ft,"failed"
            continue
        else:
            #print ">CL",cl,y,ft,"passed"
            pass


ax = fig.add_subplot(len(tox1)+1,3, j+1)
ax.set_axis_off()
cb = pl.colorbar(cf,orientation='horizontal',ticks=np.linspace(0,1,6),shrink=1.5)
cb.ax.set_xlabel('ROC AUC',fontdict=dict(size=12,weight='bold'))
pl.tight_layout()
pl.subplots_adjust(wspace=0.3,hspace=0.3)

#fig.savefig(FIG_DIR+'genra-pred-mean-auc-k0-s0-cl-%d-nn.svg' % cl)
fig.savefig(FIG_DIR+'genra-pred-mean-auc-k0-s0-cl-%d-nn.png' % cl,dpi=500)
> CL 35
In [537]:
PERF1['res2'].select(lambda x: x[0]==cc_id and x[2] in tox1)
Out[537]:
sim_type bc bio chm
cl tox effect cl_n n_perf
35 chr chr_body_weight 369 105 0.60 1.00 *(92+,13-,369) 0.66 1.00 *(92+,13-,369) 0.65 1.00 *(92+,13-,369)
chr_heart 369 105 0.59 0.96 *(27+,78-,369) 0.64 0.98 *(27+,78-,369) 0.66 0.98 *(27+,78-,369)
chr_liver 369 105 0.66 0.98 *(76+,29-,369) 0.64 1.00 *(76+,29-,369) 0.68 1.00 *(76+,29-,369)
dev dev_body_weight 369 106 0.60 1.00 *(103+,3-,369) 0.66 1.00 *(103+,3-,369) 0.53 1.00 *(103+,3-,369)
dev_bone 369 106 0.66 1.00 *(62+,44-,369) 0.62 0.97 *(62+,44-,369) 0.67 1.00 *(62+,44-,369)
dev_clinical_signs 369 106 0.74 1.00 *(77+,29-,369) 0.66 1.00 *(77+,29-,369) 0.73 1.00 *(77+,29-,369)
dev_food_consumption 369 106 0.65 0.98 *(77+,29-,369) 0.61 0.99 *(77+,29-,369) 0.63 1.00 *(77+,29-,369)
dev_intestine_small 369 106 0.45 1.00 *(3+,103-,369) 0.65 1.00 *(3+,103-,369) 0.45 1.00 *(3+,103-,369)
dev_lung 369 106 0.61 1.00 *(9+,97-,369) 0.72 1.00 *(9+,97-,369) 0.61 0.94 *(9+,97-,369)
dev_maternal_wastage 369 106 0.66 0.98 *(38+,68-,369) 0.63 1.00 *(38+,68-,369) 0.68 0.99 *(38+,68-,369)
dev_stomach 369 106 0.54 1.00 *(18+,88-,369) 0.62 0.97 *(18+,88-,369) 0.61 0.97 *(18+,88-,369)
In [151]:
VUS.select(lambda x: x[0]==0 and x[2] in tox1)[['chm','bio','bc']]
Out[151]:
s0 k0 n_chem act_wt auc auc_pval ba_max fpr_max n_neg n_pos n_sim_neg n_sim_pos sn_max sp_max t0_max cl_n
0 0.8 1 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
1 0.8 2 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
2 0.8 3 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
3 0.8 4 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
4 0.8 5 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
5 0.8 6 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
6 0.8 7 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
7 0.8 8 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
8 0.8 9 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
9 0.7 1 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
10 0.7 2 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
11 0.7 3 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
12 0.7 4 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
13 0.7 5 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
14 0.7 6 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
15 0.7 7 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
16 0.7 8 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
17 0.7 9 2 1 1.000 0.00 1.0 0 4 1 2 0 1 1 1.000000 13
18 0.6 1 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
19 0.6 2 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
20 0.6 3 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
21 0.6 4 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
22 0.6 5 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
23 0.6 6 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
24 0.6 7 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
25 0.6 8 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
26 0.6 9 4 1 1.000 0.00 1.0 0 4 1 4 0 1 1 1.000000 13
27 0.5 1 5 1 0.500 0.00 0.5 0 4 1 4 1 0 1 1.000000 13
28 0.5 2 5 1 0.375 0.18 0.5 0 4 1 4 1 0 1 1.366529 13
29 0.5 3 5 1 0.375 0.22 0.5 0 4 1 4 1 0 1 1.366529 13
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
51 0.3 7 5 1 0.250 0.34 0.5 0 4 1 4 1 0 1 1.334706 13
52 0.3 8 5 1 0.250 0.48 0.5 0 4 1 4 1 0 1 1.334706 13
53 0.3 9 5 1 0.250 0.44 0.5 0 4 1 4 1 0 1 1.334706 13
54 0.2 1 5 1 0.500 0.00 0.5 0 4 1 4 1 0 1 1.000000 13
55 0.2 2 5 1 0.250 0.36 0.5 0 4 1 4 1 0 1 1.366529 13
56 0.2 3 5 1 0.125 0.68 0.5 0 4 1 4 1 0 1 1.295062 13
57 0.2 4 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
58 0.2 5 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
59 0.2 6 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
60 0.2 7 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
61 0.2 8 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
62 0.2 9 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
63 0.1 1 5 1 0.500 0.00 0.5 0 4 1 4 1 0 1 1.000000 13
64 0.1 2 5 1 0.250 0.46 0.5 0 4 1 4 1 0 1 1.366529 13
65 0.1 3 5 1 0.125 0.60 0.5 0 4 1 4 1 0 1 1.295062 13
66 0.1 4 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
67 0.1 5 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
68 0.1 6 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
69 0.1 7 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
70 0.1 8 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
71 0.1 9 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
72 0.0 1 5 1 0.500 0.00 0.5 0 4 1 4 1 0 1 1.000000 13
73 0.0 2 5 1 0.250 0.44 0.5 0 4 1 4 1 0 1 1.366529 13
74 0.0 3 5 1 0.125 0.56 0.5 0 4 1 4 1 0 1 1.295062 13
75 0.0 4 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
76 0.0 5 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
77 0.0 6 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
78 0.0 7 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
79 0.0 8 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13
80 0.0 9 5 1 0.000 NaN 0.5 0 4 1 4 1 0 1 1.253177 13

81 rows × 16 columns

Supplemental Data Files

In [54]:
SUPDAT_DIR='/share/home/ishah/Dropbox/Projects/Chem/docs/RTP-RA1/SupplData/'
In [ ]:
# Data S1
In [60]:
# Data S2

tsv=SUPDAT_DIR+'supplemental-data-DS2-toxicity.tsv'
#W = pd.ExcelWriter(xl)
BCTc[Tox].to_csv(tsv,sep='\t')
In [174]:
#P.T.to_excel('/share/home/ishah/projects/Chem/data/results/genra-summary-%s.xls' % tmstmp)

# Output clusters to worksheet
W = pd.ExcelWriter('/share/home/ishah/projects/Chem/data/results/genra-vus-%s-all.xls' % tmstmp,engine='openpyxl')
for cl in sorted(set([i[0] for i in PERF_vol.index])):
    X = PERF_vol.xs(cl)['auc_volf1']
    X.to_excel(W,sheet_name="Cluster-%d" %cl)
W.save()
    
In [265]:
P21.ix[30:32]
Out[265]:
cl_n untested_n tested_n n_pos n_neg VUS AUC Sens Spec
cl tox effect sim_type
0 mgr mgr_adrenal_gland bc 13 9 4 2 2 0.090977 0.31(0.3) 0.39(0.4) 0.86(0.2)
bio 13 9 4 2 2 0.097950 0.24(0.2) 0.30(0.5) 0.85(0.2)
In [235]:
#P.T.to_excel('/share/home/ishah/projects/Chem/data/results/genra-summary-%s.xls' % tmstmp)

# Output clusters to worksheet
W = pd.ExcelWriter('/share/home/ishah/projects/Chem/data/results/genra-perf-all-%s.xls' % tmstmp,engine='openpyxl')
for cl in sorted(set([i[0] for i in PERF1.index])):
    X = PERF1.xs(cl)['res2']
    X.to_excel(W,sheet_name="Cluster-%d" %cl)
W.save()
In [353]:
W = pd.ExcelWriter('/share/home/ishah/projects/Chem/data/results/genra-perf-sig-%s.xls' % tmstmp,engine='openpyxl')

X0 = PERF1['auc'][['chm','bio','bc']]
X = (X0>0.8).groupby(level=[0,1]).sum()
#X['Any bio|chm|bc'] = (X0>0).any(axis=1)
X['Only chm'] = X0.apply(lambda x: x[0]>=0.8 and x[1]<0.8 and x[2]<0.8,axis=1).groupby(level=[0,1]).sum()
X['Only bio'] = X0.apply(lambda x: x[0]< 0.8 and x[1]>=0.8 and x[2]<0.8,axis=1).groupby(level=[0,1]).sum()
X['Only bc']  = X0.apply(lambda x: x[0]< 0.8 and x[1]<0.8 and x[2]>=0.8,axis=1).groupby(level=[0,1]).sum()
X['chm>bio|bc'] = X0.apply(lambda x: x[0]>=0.8 and x[0]>x[1] and x[0]>x[2],axis=1).groupby(level=[0,1]).sum()
X['bio>chm|bc'] = X0.apply(lambda x: x[1]>=0.8 and x[1]>x[0] and x[1]>x[2],axis=1).groupby(level=[0,1]).sum()
X['bc>chm|bio'] = X0.apply(lambda x: x[2]>=0.8 and x[2]>x[0] and x[2]>x[1],axis=1).groupby(level=[0,1]).sum()

X.to_excel(W,sheet_name="Summary")

for cl in sorted(set([i[0] for i in PERF1.index])):
    X=PERF1.xs(cl)
    I = np.where((X['auc']>0.8).sum(axis=1)>0)
    if I and len(I[0])>0:
        X.res2[['chm','bio','bc']].ix[I].to_excel(W,sheet_name="Cluster-%d" %cl)
W.save()