Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions examples/test.clustal
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
CLUSTAL O(1.2.4) multiple sequence alignment


mouse MMPPWLLARTLIMALFFSCLTPGSLNPCIEVVPNITYQCMDQKLSKVPDDIPSSTKNIDL
opossum -MVPWMVMALIMATTFFSCPDPGSCHSCLEVFPSIEYQCMDQNFSKVPMDIPASTQKLDL
* **:: :: : **** *** : *:**.*.* ******::**** ***:**:::**

mouse SFNPLKILKSYSFSNFSELQWLDLSRCEIETIEDKAWHGLHHLSNLILTGNPIQSFSPGS
opossum SFNPLRSLAPRSFSRITELRILDLSRCDIQKIEDDTYEGLHNLSTLILTGNPIQSLGIRA
*****: * ***.::**: ******:*:.***.::.***:**.**********:. :

mouse FSGLTSLENLVAVETKLASLESFPIGQLITLKKLNVAHNFIHSCKLPAYFSNLTNLVHVD
opossum FYGLPKLQKLVVVETNLMSLEDFPIGHIITLQELHLGHNSISSLQLPSYFASFSSMKHLD
* ** .*::**.***:* ***.****::***::*::.** * * :**:**:.::.: *:*

mouse LSYNYIQTITVNDLQFLRENPQVNLSLDMSLNPIDFIQDQAFQGIKLHELTLRGNFNSSN
opossum LHLNQIKNISDGDLEPVKLNPN--LTLDLSENPIQYIHPGAFKGIHLSGLKLRSSFHNST
* * *:.*: .**: :: **: *:**:* ***::*: **:**:* *.**..*:.*.

mouse IMKTCLQNLAGLHVHRLILGEFKDERNLEIFEPSIMEGLCDVTIDEFRLTYTNDFSDD-I
opossum IMEACIKGMTGLKVGKLVLGNYKNDIQMTSFESSILDGLCEVDIGEFRMVQPSQFPSDGN
**::*::.::**:* :*:**::*:: :: ** **::***:* *.***:. .:* .*

mouse VKFHCLANVSAMSLAGVSIKYLEDVPKHFKWQSLSIIRCQLKQ---FPTLDLPFLKSLTL
opossum GVFKCLVNVSVLFLVNTDIDQLTNLPIFPKLFSLSLTDCNFDIMPDLPSISLPSLKELKV
*:**.***.: *....*. * ::* . * ***: *::. :*::.** **.*.:

mouse TMNKGSISFKKVALPSLSYLDLSRNALSFSGCCSYSDLGTNSLRHLDLSFNGAIIMSANF
opossum THHKDLTTLSDLALPNLQILDLSKNSLNVLSCCSDKTFNTPNLKYLNLSYNAHISLSKNF
* :*. ::..:***.*. ****:*:*.. .*** . :.* .*::*:**:*. * :* **

mouse MGLEELQHLDFQHSTLKRVTEFSAFLSLEKLLYLDISYTNTKIDFDGIFLGLTSLNTLKM
opossum MGLENLESLDLQHTILKDHDKYPAFYSLINLRYLDISYTNTHVKFKNIFTGMSNLKHLKM
****:*: **:**: ** :: ** ** :* *********::.*..** *::.*: ***

mouse AGNSFKDNTLSNVFANTTNLTFLDLSKCQLEQISWGVFDTLHRLQLLNMSHNNLLFLDSS
opossum AGSSFQDNILHDIFKNLTKLVSLNISYCQLERVSQETMSPLHQLQVLDLSHNKLQTFDPF
**.**:** * ::* * *:*. *::* ****::* .:. **:**:*::***:* :*

mouse HYNQLYSLSTLDCSFNRIETSKG-ILQHFPKSLAFFNLTNNSVACICEHQKFLQWVKEQK
opossum ICMPLLNLQVLNCSSNSITALNGENLQELSRTLISIDLSDNPFDCVCDHQIFFQWVKEHT
* .*..*:** * * : :* **.: ::* ::*::* . *:*:** *:*****:.

mouse QFLVNVEQMTCATPVEMNTSLVLDFNNSTCYMYKTIISVSVVSVIVVSTVAFLIYHFYFH
opossum HLLNRSKPMTCQTPSHMKDISVQLFDDSTCHMKKTVIAVSLLGIIVILLVLALVYKFYFH
::* . : *** ** .*: * *::***:* **:*:**::.:**: * *:*:****

mouse LILIAGCKKYSRGESIYDAFVIYSSQNEDWVRNELVKNLEEGVPRFHLCLHYRDFIPGVA
opossum LMLLAGCKNIGGGESIYDAFVIYSSQDEDWVRKELVKNLEEGVPSFQLCLHYRDFIPGVA
*:*:****: . **************:*****:*********** *:*************

mouse IAANIIQEGFHKSRKVIVVVSRHFIQSRWCIFEYEIAQTWQFLSSRSGIIFIVLEKVEKS
opossum IAANIIQEGFHKSRKVIVVISEHFIQSRWCKFEYEIAQTWQFLSSQAGIIFIILQKVEKS
*******************:*.******** **************::*****:*:*****

mouse LLRQQVELYRLLSRNTYLEWEDNPLGRHIFWRRLKNALLDGKASNPEQTAEEEQETATWT
opossum LLRQQMELYRLLNRNTYLEWEDTNLGRHVFWRRLRKALLDGRTKNPKGAAEEEFSQPVSP
*****:******.*********. ****:*****::*****::.**: :**** . .

mouse -
opossum T

CLUSTAL O(1.2.4) multiple sequence alignment


mouse MLPFILFSTLLSPILTESEKQQWFCNSSDAIISYSYCDHLKFPISISSEPCIRLRGTNGF
opossum MLQIVFFSILFTFTFTESMKNDWTCNSQDAEITYSSCDAKKPIPIININPCLSWRKTRGN
** :::** *:: :*** *::* ***.** *:** ** * *. :**: * *.*

mouse VHVEFIPRGNLKYLYFNLFISVN-SIELPKRKEVLCHGHDDDYSFCRALKGETVNTSIPF
opossum LSFYYVPRKDMKELYFNVHMELRSAIIVPKRKEVICRGVDDKYSFCRVLKGETINTTVPF
: . ::** ::* ****:.:.:. :* :******:*:* **.*****.*****:**::**

mouse SFEGILFPKGHYRCVAEAIAGDTEEKLFCLNFTIIHRRDVN
opossum SYSSLKFPKGLYIFIAEAFSGSTEDSMFCCNITLKLK----
*:..: **** * :***::*.**:.:** *:*: :
272 changes: 272 additions & 0 deletions examples/testing.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import phylopandas as phypd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"seq_df = phypd.read_fasta(\"test.fasta\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>description</th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>sequence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>gi|2765658|emb|Z78533.1|CIZ78533 C.irapeanum 5...</td>\n",
" <td>gi|2765658|emb|Z78533.1|CIZ78533</td>\n",
" <td>gi|2765658|emb|Z78533.1|CIZ78533</td>\n",
" <td>CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGA...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>gi|2765657|emb|Z78532.1|CCZ78532 C.californicu...</td>\n",
" <td>gi|2765657|emb|Z78532.1|CCZ78532</td>\n",
" <td>gi|2765657|emb|Z78532.1|CCZ78532</td>\n",
" <td>CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGA...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>gi|2765656|emb|Z78531.1|CFZ78531 C.fasciculatu...</td>\n",
" <td>gi|2765656|emb|Z78531.1|CFZ78531</td>\n",
" <td>gi|2765656|emb|Z78531.1|CFZ78531</td>\n",
" <td>CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGA...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>gi|2765655|emb|Z78530.1|CMZ78530 C.margaritace...</td>\n",
" <td>gi|2765655|emb|Z78530.1|CMZ78530</td>\n",
" <td>gi|2765655|emb|Z78530.1|CMZ78530</td>\n",
" <td>CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGA...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>gi|2765654|emb|Z78529.1|CLZ78529 C.lichiangens...</td>\n",
" <td>gi|2765654|emb|Z78529.1|CLZ78529</td>\n",
" <td>gi|2765654|emb|Z78529.1|CLZ78529</td>\n",
" <td>ACGGCGAGCTGCCGAAGGACATTGTTGAGACAGCAGAATATACGAT...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" description \\\n",
"0 gi|2765658|emb|Z78533.1|CIZ78533 C.irapeanum 5... \n",
"1 gi|2765657|emb|Z78532.1|CCZ78532 C.californicu... \n",
"2 gi|2765656|emb|Z78531.1|CFZ78531 C.fasciculatu... \n",
"3 gi|2765655|emb|Z78530.1|CMZ78530 C.margaritace... \n",
"4 gi|2765654|emb|Z78529.1|CLZ78529 C.lichiangens... \n",
"\n",
" id name \\\n",
"0 gi|2765658|emb|Z78533.1|CIZ78533 gi|2765658|emb|Z78533.1|CIZ78533 \n",
"1 gi|2765657|emb|Z78532.1|CCZ78532 gi|2765657|emb|Z78532.1|CCZ78532 \n",
"2 gi|2765656|emb|Z78531.1|CFZ78531 gi|2765656|emb|Z78531.1|CFZ78531 \n",
"3 gi|2765655|emb|Z78530.1|CMZ78530 gi|2765655|emb|Z78530.1|CMZ78530 \n",
"4 gi|2765654|emb|Z78529.1|CLZ78529 gi|2765654|emb|Z78529.1|CLZ78529 \n",
"\n",
" sequence \n",
"0 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGA... \n",
"1 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGA... \n",
"2 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGA... \n",
"3 CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGA... \n",
"4 ACGGCGAGCTGCCGAAGGACATTGTTGAGACAGCAGAATATACGAT... "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"seq_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"align_df = phypd.read_clustal(\"test.clustal\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>description</th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>sequence</th>\n",
" <th>star</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">0</th>\n",
" <th>0</th>\n",
" <td>mouse</td>\n",
" <td>mouse</td>\n",
" <td>&lt;unknown name&gt;</td>\n",
" <td>MMPPWLLARTLIMALFFSCLTPGSLNPCIEVVPNITYQCMDQKLSK...</td>\n",
" <td>* **:: :: : **** *** : *:**.*.* ******::**...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>opossum</td>\n",
" <td>opossum</td>\n",
" <td>&lt;unknown name&gt;</td>\n",
" <td>-MVPWMVMALIMATTFFSCPDPGSCHSCLEVFPSIEYQCMDQNFSK...</td>\n",
" <td>* **:: :: : **** *** : *:**.*.* ******::**...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">1</th>\n",
" <th>0</th>\n",
" <td>mouse</td>\n",
" <td>mouse</td>\n",
" <td>&lt;unknown name&gt;</td>\n",
" <td>MLPFILFSTLLSPILTESEKQQWFCNSSDAIISYSYCDHLKFPISI...</td>\n",
" <td>** :::** *:: :*** *::* ***.** *:** ** * *...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>opossum</td>\n",
" <td>opossum</td>\n",
" <td>&lt;unknown name&gt;</td>\n",
" <td>MLQIVFFSILFTFTFTESMKNDWTCNSQDAEITYSSCDAKKPIPII...</td>\n",
" <td>** :::** *:: :*** *::* ***.** *:** ** * *...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" description id name \\\n",
"0 0 mouse mouse <unknown name> \n",
" 1 opossum opossum <unknown name> \n",
"1 0 mouse mouse <unknown name> \n",
" 1 opossum opossum <unknown name> \n",
"\n",
" sequence \\\n",
"0 0 MMPPWLLARTLIMALFFSCLTPGSLNPCIEVVPNITYQCMDQKLSK... \n",
" 1 -MVPWMVMALIMATTFFSCPDPGSCHSCLEVFPSIEYQCMDQNFSK... \n",
"1 0 MLPFILFSTLLSPILTESEKQQWFCNSSDAIISYSYCDHLKFPISI... \n",
" 1 MLQIVFFSILFTFTFTESMKNDWTCNSQDAEITYSSCDAKKPIPII... \n",
"\n",
" star \n",
"0 0 * **:: :: : **** *** : *:**.*.* ******::**... \n",
" 1 * **:: :: : **** *** : *:**.*.* ******::**... \n",
"1 0 ** :::** *:: :*** *::* ***.** *:** ** * *... \n",
" 1 ** :::** *:: :*** *::* ***.** *:** ** * *... "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"align_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It might be nice to be able to name the alignments instead of having each alignment be multiindexed by a number but maybe that's not a huge issue."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading