diff --git a/CANDy v2.0.ipynb b/CANDy v2.0.ipynb index b0b83af..c0bf595 100644 --- a/CANDy v2.0.ipynb +++ b/CANDy v2.0.ipynb @@ -3890,7 +3890,7 @@ "id": "1fa04b37", "metadata": {}, "source": [ - "# MSA: MAFFT" + "# MSA: FAMSA" ] }, { @@ -3909,7 +3909,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": null, "id": "9ceb6174", "metadata": {}, "outputs": [ @@ -8207,30 +8207,30 @@ } ], "source": [ - "#MAFFT\n", + "#FAMSA\n", "import subprocess\n", "from subprocess import Popen, PIPE\n", "\n", - "def MAFFT(execute = True):\n", - " \"\"\"When execute is True, this function runs the Clustal Omega executable to perform a MSA\"\"\"\n", + "def FAMSA(execute = True):\n", + " \"\"\"When execute is True, this function runs the FAMSA executable to perform MSA\"\"\"\n", " if execute: \n", " \n", - " #path to clustalo_exe\n", - " mafft = os.getcwd() + '/mafft' #r\"/Users/winde/Alex/Doctoraat/PyEED/Jupyter Notebooks/mafft\"\n", + " #path to FAMSA executable\n", + " famsa = os.getcwd() + '/famsa'\n", " \n", " #name of the input file\n", - " in_file = os.path.join(jobname, f\"CAZy_{jobname}_inclchar_selected.fasta\") #'/Users/winde/Alex/Doctoraat/PyEED/Jupyter Notebooks/CANDy/V1.0/GH57_All/GH57_MSA_Catdomains_75_cleaned_1.fasta' #s.path.join(jobname, f\"CAZy_{family}_{taxsubset}_{cutoff}_inclchar_selected.fasta\")\n", + " in_file = os.path.join(jobname, f\"CAZy_{jobname}_inclchar_selected.fasta\")\n", "\n", " #name you want to give to the outputfile\n", - " out_file = os.path.join(jobname, f\"CAZy_{jobname}_aligned.fasta\") #os.path.join(jobname, f\"CAZy_{family}_{taxsubset}_{cutoff}_aligned.fasta\")\n", + " out_file = os.path.join(jobname, f\"CAZy_{jobname}_aligned.fasta\")\n", + " \n", + " #perform the multiple sequence alignment using FAMSA\n", + " famsa_cmd = [famsa, \"-refine_mode\", \"on\", in_file, out_file]\n", + " p = subprocess.Popen(famsa_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n", + " stdout, stderr = p.communicate()\n", " \n", - " #perform the multiple sequence alignment using MAFFT\n", - " mafft_cmd = [\"mafft\", \"--auto\", in_file]\n", - " with open(out_file, \"w\") as outfile:\n", - " p = subprocess.Popen(mafft_cmd, stdout=outfile, stderr=subprocess.PIPE)\n", - " _, error_output = p.communicate()\n", " if p.returncode != 0:\n", - " raise RuntimeError(\"An error occurred while running MAFFT:\\n{}\".format(error_output.decode()))\n", + " raise RuntimeError(\"An error occurred while running FAMSA:\\n{}\".format(stderr.decode()))\n", " \n", " with open(out_file, 'r') as r:\n", " print(r.read())\n", @@ -8238,7 +8238,7 @@ " print('Finished!')\n", " return\n", "\n", - "alignment = MAFFT(execute)" + "alignment = FAMSA(execute)" ] }, { @@ -8265,7 +8265,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": null, "id": "0cfcbecd", "metadata": {}, "outputs": [ @@ -8279,14 +8279,15 @@ ], "source": [ "from subprocess import Popen, PIPE\n", + "import os\n", "\n", "def FastTree(execute = True):\n", - " \"\"\"When execute is True, this function runs the FastTree executable to perform PTI\"\"\"\n", + " \"\"\"When execute is True, this function runs the VeryFastTree executable to perform PTI\"\"\"\n", " \n", " if execute:\n", " \n", - " #path to FastTree_exe\n", - " FastTree = os.getcwd() + '/FastTree' #'/Users/winde/Alex/Doctoraat/PyEED/Jupyter Notebooks/FastTree'\n", + " #path to VeryFastTree_exe\n", + " VeryFastTree = os.getcwd() + '/VeryFastTree'\n", " \n", " #name of the inputfile\n", " inputfile = os.path.join(jobname, f\"CAZy_{jobname}_aligned.fasta\") #os.path.join(jobname, f\"CAZy_{family}_{taxsubset}_{cutoff}_aligned.fasta\")\n", @@ -8294,11 +8295,15 @@ " #name of the outputfile\n", " outputfile = os.path.join(jobname, f\"CAZy_{jobname}_phyltree.nwk\") #os.path.join(jobname, f\"CAZy_{family}_{taxsubset}_phyltree.nwk\")\n", " \n", - " #build a phylogenetic tree using FastTree\n", - " fasttree_process = Popen([FastTree, \"-quiet\", \"-out\", outputfile, inputfile], stdout=PIPE, stderr=PIPE)\n", + " # Determine number of CPU cores available and use max - 2\n", + " max_cores = os.cpu_count() or 4 # Default to 4 if cpu_count returns None\n", + " num_threads = max(1, max_cores - 2) # Ensure at least 1 thread\n", + " \n", + " # Build a phylogenetic tree using VeryFastTree with multithreading\n", + " fasttree_process = Popen([VeryFastTree, \"-threads\", str(num_threads), \"-quiet\", \"-out\", outputfile, inputfile], stdout=PIPE, stderr=PIPE)\n", " stdout, stderr = fasttree_process.communicate()\n", " \n", - " print(f\"{outputfile} finished!\")\n", + " print(f\"{outputfile} finished using {num_threads} threads!\")\n", " \n", " return \n", "\n",