Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 28 additions & 23 deletions CANDy v2.0.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3890,7 +3890,7 @@
"id": "1fa04b37",
"metadata": {},
"source": [
"# MSA: MAFFT"
"# MSA: FAMSA"
]
},
{
Expand All @@ -3909,7 +3909,7 @@
},
{
"cell_type": "code",
"execution_count": 79,
"execution_count": null,
"id": "9ceb6174",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -8207,38 +8207,38 @@
}
],
"source": [
"#MAFFT\n",
"#FAMSA\n",
"import subprocess\n",
"from subprocess import Popen, PIPE\n",
"\n",
"def MAFFT(execute = True):\n",
" \"\"\"When execute is True, this function runs the Clustal Omega executable to perform a MSA\"\"\"\n",
"def FAMSA(execute = True):\n",
" \"\"\"When execute is True, this function runs the FAMSA executable to perform MSA\"\"\"\n",
" if execute: \n",
" \n",
" #path to clustalo_exe\n",
" mafft = os.getcwd() + '/mafft' #r\"/Users/winde/Alex/Doctoraat/PyEED/Jupyter Notebooks/mafft\"\n",
" #path to FAMSA executable\n",
" famsa = os.getcwd() + '/famsa'\n",
" \n",
" #name of the input file\n",
" in_file = os.path.join(jobname, f\"CAZy_{jobname}_inclchar_selected.fasta\") #'/Users/winde/Alex/Doctoraat/PyEED/Jupyter Notebooks/CANDy/V1.0/GH57_All/GH57_MSA_Catdomains_75_cleaned_1.fasta' #s.path.join(jobname, f\"CAZy_{family}_{taxsubset}_{cutoff}_inclchar_selected.fasta\")\n",
" in_file = os.path.join(jobname, f\"CAZy_{jobname}_inclchar_selected.fasta\")\n",
"\n",
" #name you want to give to the outputfile\n",
" out_file = os.path.join(jobname, f\"CAZy_{jobname}_aligned.fasta\") #os.path.join(jobname, f\"CAZy_{family}_{taxsubset}_{cutoff}_aligned.fasta\")\n",
" out_file = os.path.join(jobname, f\"CAZy_{jobname}_aligned.fasta\")\n",
" \n",
" #perform the multiple sequence alignment using FAMSA\n",
" famsa_cmd = [famsa, \"-refine_mode\", \"on\", in_file, out_file]\n",
" p = subprocess.Popen(famsa_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
" stdout, stderr = p.communicate()\n",
" \n",
" #perform the multiple sequence alignment using MAFFT\n",
" mafft_cmd = [\"mafft\", \"--auto\", in_file]\n",
" with open(out_file, \"w\") as outfile:\n",
" p = subprocess.Popen(mafft_cmd, stdout=outfile, stderr=subprocess.PIPE)\n",
" _, error_output = p.communicate()\n",
" if p.returncode != 0:\n",
" raise RuntimeError(\"An error occurred while running MAFFT:\\n{}\".format(error_output.decode()))\n",
" raise RuntimeError(\"An error occurred while running FAMSA:\\n{}\".format(stderr.decode()))\n",
" \n",
" with open(out_file, 'r') as r:\n",
" print(r.read())\n",
"\n",
" print('Finished!')\n",
" return\n",
"\n",
"alignment = MAFFT(execute)"
"alignment = FAMSA(execute)"
]
},
{
Expand All @@ -8265,7 +8265,7 @@
},
{
"cell_type": "code",
"execution_count": 81,
"execution_count": null,
"id": "0cfcbecd",
"metadata": {},
"outputs": [
Expand All @@ -8279,26 +8279,31 @@
],
"source": [
"from subprocess import Popen, PIPE\n",
"import os\n",
"\n",
"def FastTree(execute = True):\n",
" \"\"\"When execute is True, this function runs the FastTree executable to perform PTI\"\"\"\n",
" \"\"\"When execute is True, this function runs the VeryFastTree executable to perform PTI\"\"\"\n",
" \n",
" if execute:\n",
" \n",
" #path to FastTree_exe\n",
" FastTree = os.getcwd() + '/FastTree' #'/Users/winde/Alex/Doctoraat/PyEED/Jupyter Notebooks/FastTree'\n",
" #path to VeryFastTree_exe\n",
" VeryFastTree = os.getcwd() + '/VeryFastTree'\n",
" \n",
" #name of the inputfile\n",
" inputfile = os.path.join(jobname, f\"CAZy_{jobname}_aligned.fasta\") #os.path.join(jobname, f\"CAZy_{family}_{taxsubset}_{cutoff}_aligned.fasta\")\n",
" \n",
" #name of the outputfile\n",
" outputfile = os.path.join(jobname, f\"CAZy_{jobname}_phyltree.nwk\") #os.path.join(jobname, f\"CAZy_{family}_{taxsubset}_phyltree.nwk\")\n",
" \n",
" #build a phylogenetic tree using FastTree\n",
" fasttree_process = Popen([FastTree, \"-quiet\", \"-out\", outputfile, inputfile], stdout=PIPE, stderr=PIPE)\n",
" # Determine number of CPU cores available and use max - 2\n",
" max_cores = os.cpu_count() or 4 # Default to 4 if cpu_count returns None\n",
" num_threads = max(1, max_cores - 2) # Ensure at least 1 thread\n",
" \n",
" # Build a phylogenetic tree using VeryFastTree with multithreading\n",
" fasttree_process = Popen([VeryFastTree, \"-threads\", str(num_threads), \"-quiet\", \"-out\", outputfile, inputfile], stdout=PIPE, stderr=PIPE)\n",
" stdout, stderr = fasttree_process.communicate()\n",
" \n",
" print(f\"{outputfile} finished!\")\n",
" print(f\"{outputfile} finished using {num_threads} threads!\")\n",
" \n",
" return \n",
"\n",
Expand Down