# -*- coding: utf-8 -*- """ Created on Thu Apr 4 @author: gregoire Python 3.6.2 pandas 0.20.3 numpy 1.13.1 set the release folder as the cwd before running prints a csv table summarizing the pvds plates, inkj plates, inkj samples in an HTEJCAP database release using the 2 summary tables plate.csv and composition.csv """ import pandas as pd import numpy as np import os p=os.path.join(os.getcwd(),r'plate\plate.csv') #df has a row for each plate, including pvds and inkj-based synthesis df=pd.read_csv(p) p2=os.path.join(os.getcwd(),r'plate\composition.csv') #df2 has a row for each inkj (plate_id,sample_no), including reference samples df2=pd.read_csv(p2) #the columns of df2 include the ink loading, which after accounting for cation #concentration in the ink, yields the concentration of each cation, *.PM.AtFrac inkj_atfrac_keys=[k for k in sorted(df2.keys()) if k.endswith('.PM.AtFrac')] carr=np.float32([np.float32(df2[k]) for k in inkj_atfrac_keys]) carr=carr.T carr[np.isnan(carr)]=0. #carr is n smps x n els where inkj_atfrac_keys are the els #df contains the composition system, but individual inkj samples have their #own composition systems, e.g. a subsystem of the plate's comp. space #Here the comp. system is saved as a string similar to df[''] inkj_composition_system=np.array(\ ['-'.join([inkj_atfrac_keys[i].partition('.')[0] \ for i in np.where(ca>0.)[0]]) for ca in carr]) #to create a summary table first build a series of binary arrays (typically #int32) that for each plate indicates the type of print, specific anions, and #number of cations classif_d={} print_class_keys=['pvds','inkj'] for pt in print_class_keys: classif_d['%s' %pt]=df['print_type']==pt #6 anion classes, O,S,N,OS,ON,other (other is typically intended to be metals) classif_d['OS']=df['contains_O']*df['contains_S']*(1.-df['contains_N']) classif_d['ON']=df['contains_O']*df['contains_N']*(1.-df['contains_S']) classif_d['O']=df['contains_O']*(1.-df['contains_S'])*(1.-df['contains_N']) classif_d['S']=df['contains_S']*(1.-df['contains_O'])*(1.-df['contains_N']) classif_d['N']=df['contains_N']*(1.-df['contains_O'])*(1.-df['contains_S']) anion_class_keys=['O','S','N','OS','ON'] #other means not any of the above 5 classes classif_d['other']=(1.-np.array([classif_d[k] for k in \ anion_class_keys])).prod(axis=0) anion_class_keys+=['other'] #get number of anions to help get number of cations classif_d['num_anions']=np.array([classif_d[k]*numan for k,numan in \ zip(anion_class_keys,[1,1,1,2,2,0])]).sum(axis=0,dtype='int32') classif_d['num_cations']=df['num_elements']-classif_d['num_anions'] #cations classified by the number n_cation_vals=range(1,5) cation_class_keys=[] for nc in n_cation_vals: classif_d['%d' %nc]=classif_d['num_cations']==nc cation_class_keys+=['%d' %nc] classif_d['>%d' %nc]=classif_d['num_cations']>nc cation_class_keys+=['>%d' %nc] #make subtables for pvds and inkj, which count plates, comp systems of plates csvvals=[] for printcount,pk in enumerate(print_class_keys): #table header lines csvvals+=[['','num. %s libraries' %pk,'','','','','']] csvvals+=[['num. cations in plate:']+cation_class_keys+['num. comp. sys.']] for ak in anion_class_keys: num_comp_sys_by_pk_ak=0 for ck in cation_class_keys: #when the tuple of classifiers is the key, the array indicates #whether the plate matches all 3 classificatons classif_d[(pk,ak,ck)]=np.array([classif_d[k] for k in (pk,ak,ck)])\ .prod(axis=0,dtype='int32') #classif_d[(pk,ak,ck)]>0 create a boolean array which indexes the #composition systems, which are string with elements ordered so the #number of unique strings is the number of unique plate-level #composition systems num_comp_sys_by_pk_ak+=\ len(set(df['composition_system'][classif_d[(pk,ak,ck)]>0])) csvvals+=[[ak]+\ ['%d' %classif_d[(pk,ak,ck)].sum() for ck in cation_class_keys]+\ ['%d' %num_comp_sys_by_pk_ak]] #the last of the 3 subtables is similar to above but at the sample level #the pvds libraries are continuous oilibraries so every sample contains the #full set of plate elements, but inkj samples can be subsytems of the plate els pl='inkj' csvvals+=[['','num. %s samples' %pk,'','','','','']] csvvals+=[['num. cations in sample:']+cation_class_keys+\ ['num. unique element combinations']] for ak in anion_class_keys: #get the plate_ids from the plate table that are inkj & in the anion class pids=df['plate_id'][np.array([classif_d[k] for k in (pk,ak)])\ .prod(axis=0,dtype='bool')] #use plate ids to get the samples from the inkj table and create a flat #list of the composition system strings compsys_samples=[s for pid in pids \ for s in inkj_composition_system[df2['plate_id']==pid]] #s.count('-')+1 is a shortcut for number of cations #(inkj table doesn't include anions added via annealing) n_cations_inkj_samples=np.array([s.count('-')+1 for s in compsys_samples]) #now count the inkj samples in each number-of-cations class sample_counts_by_ck=[] for nc in n_cation_vals: sample_counts_by_ck+=[(n_cations_inkj_samples==nc).sum()] sample_counts_by_ck+=[(n_cations_inkj_samples>nc).sum()] csvvals+=[[ak]+\ ['%d' %v for v in sample_counts_by_ck]+\ ['%d' %len(set(compsys_samples))]] print('\n'.join([','.join(l) for l in csvvals]))