77path_ = os .getcwd () # get the current working directory
88_ = (os .path .join (path_ ,"data/data.txt" )) # path to input data (sequence) folder
99
10+ print ('Reading data...' )
1011if os .path .exists (_ ):data = pd .read_table (_ ) #read the file as Pandas DataFrame
12+ print ('Clearing existing files...' )
13+ try :[os .remove (filenames [0 ]+ x ) for filenames in os .walk (path_ + '/data/' ) for x in (filenames [2 ])] # remove the file if already exist
14+ except Exception :pass
1115
1216seq_list , cls_list = data ['sequence' ].tolist (), data ['class' ].tolist () # get the sequence and class to lists
1317
14- pth = (path_ + "/data/ output/" )
18+ pth = (path_ + "/output/" )
1519if not os .path .exists (pth ):os .makedirs (pth )
1620
1721try :[os .remove (filenames [0 ]+ x ) for filenames in os .walk (pth ) for x in (filenames [2 ])] # remove the file if already exist
1822except Exception :pass
1923
20- attr = open (path_ + "/data /attrib" ,"rb" )
24+ attr = open (path_ + "/config /attrib" ,"rb" )
2125attr = pickle .load (attr ) # load the pickle file with attribue names (for weka)
2226with open (pth + "/weka_output.arff" ,"a+" ) as wk : wk .write ("" .join ('{}\n ' .format (x ) for x in attr ))
2327
2428def format_output (aa_count ,cnt ): # write the extracted feature values to arff (weka), txt(svm) and csv file
2529 a = (dict (zip (it .count (), list (aa_count .values ()))))
2630 if cnt == 1 :
27- with open (pth + "/ svm_out.txt" ,"a+" )as s : s .write ("+1 " + ' ' .join ("{}:{}" .format (k , v ) for k , v in a .items ())+ "\n " )
31+ with open (pth + "svm_out.txt" ,"a+" )as s : s .write ("+1 " + ' ' .join ("{}:{}" .format (k , v ) for k , v in a .items ())+ "\n " )
2832 with open (pth + "weka_output.arff" ,"a+" ) as w : w .write (' ' .join ("{}," .format (x ) for x in list (aa_count .values ()))+ " serk\n " )
2933 with open (pth + "tain_DL.csv" ,"a+" ) as DPL : DPL .write ('' .join ("{}," .format (x ) for x in list (aa_count .values ()))+ str (round (aromat ,3 ))+ "," + str (round (fraction [0 ],3 ))+ "," + str (round (fraction [1 ],3 ))+ "," + str (round (fraction [2 ],3 ))+ "," + str (round (iso ,3 ))+ "," + str (mol_w )+ "," + str (ins )+ "," + str (cnt )+ "\n " )
3034 else :
31- with open (path_ + "/data/output/ svm_out.txt" ,"a+" )as s :s .write ("-1 " + ' ' .join ("{}:{}" .format (k , v ) for k , v in a .items ())+ "\n " )
35+ with open (path_ + "svm_out.txt" ,"a+" )as s :s .write ("-1 " + ' ' .join ("{}:{}" .format (k , v ) for k , v in a .items ())+ "\n " )
3236 with open (pth + "weka_output.arff" ,"a+" ) as w : w .write (' ' .join ("{}," .format (x ) for x in list (aa_count .values ()))+ " loc\n " )
3337 with open (pth + "tain_DL.csv" ,"a+" ) as DPL : DPL .write ('' .join ("{}," .format (x ) for x in list (aa_count .values ()))+ str (round (aromat ,3 ))+ "," + str (round (fraction [0 ],3 ))+ "," + str (round (fraction [1 ],3 ))+ "," + str (round (fraction [2 ],3 ))+ "," + str (round (iso ,3 ))+ "," + str (mol_w )+ "," + str (ins )+ "," + "0" + "\n " )
3438
@@ -39,3 +43,5 @@ def format_output(aa_count,cnt): # write the extracted feature values to arf
3943 try :mol_w , ins = ("%0.2f" % _ .molecular_weight ()),("%0.2f" % _ .instability_index ())
4044 except Exception :mol_w ,ins = mol_w ,ins # aromaticity, sec_strucure_fraction, iso_electric point , molecular weight, instability index
4145 format_output (aa_count ,cl )
46+ print ("Feature extraction complete..." )
47+ print ("Extracted features are saved in data/output directory in .txt, .arff and .csv formats" )
0 commit comments