-
Notifications
You must be signed in to change notification settings - Fork 1
Contamination removal, fastq preprocessing #20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
910ffa0
dd9547a
ee7d65d
fc11717
07404fb
45917b0
8a7ebf4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -229,6 +229,32 @@ def download_deepvariant_model_files(urls: list, outfolder: str): | |
| return destpath | ||
|
|
||
|
|
||
| def download_sortmerna_db(url, keep_file, outfolder): | ||
| """ | ||
| Download SortMeRNA database, extract only the needed file, and cleanup. | ||
| """ | ||
| dest_file = os.path.join(outfolder, keep_file) | ||
| if os.path.isfile(dest_file): | ||
| logging.info(f"{keep_file} already exists. Skipping.") | ||
| return dest_file | ||
|
|
||
| tar_filename = url.split("/")[-1] | ||
| tar_path = os.path.join(outfolder, tar_filename) | ||
|
|
||
| logging.info(f"Downloading SortMeRNA database from {url}") | ||
| if not os.path.isfile(tar_path): | ||
| run_command(["wget", "-c", url, "-P", outfolder]) | ||
|
|
||
| logging.info(f"Extracting {keep_file} from archive") | ||
| run_command(["tar", "-xzf", tar_path, "-C", outfolder, f"--wildcards", f"*/{keep_file}", "--strip-components=1"]) | ||
|
|
||
| logging.info("Cleaning up archive") | ||
| if os.path.isfile(tar_path): | ||
| os.remove(tar_path) | ||
|
|
||
| return dest_file | ||
|
Comment on lines
+232
to
+255
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Honor The new SortMeRNA downloader ignores dry-run and may update config even if extraction didn’t actually produce 🛠️ Suggested fix-def download_sortmerna_db(url, keep_file, outfolder):
+def download_sortmerna_db(url, keep_file, outfolder, dry=False):
@@
- logging.info(f"Downloading SortMeRNA database from {url}")
- if not os.path.isfile(tar_path):
- run_command(["wget", "-c", url, "-P", outfolder])
+ logging.info(f"Downloading SortMeRNA database from {url}")
+ if dry:
+ logging.info("Dry-run enabled; skipping download/extraction.")
+ return dest_file
+ if not os.path.isfile(tar_path):
+ run_command(["wget", "-c", url, "-P", outfolder])
@@
- run_command(["tar", "-xzf", tar_path, "-C", outfolder, f"--wildcards", f"*/{keep_file}", "--strip-components=1"])
+ run_command(["tar", "-xzf", tar_path, "-C", outfolder, "--wildcards", f"*/{keep_file}", "--strip-components=1"])
+ if not os.path.isfile(dest_file):
+ raise FileNotFoundError(f"Expected {dest_file} after extraction")
@@
- elif ftype == "sortmerna":
- path = download_sortmerna_db(res_entry['url'], res_entry['keep_file'], outfolder)
+ elif ftype == "sortmerna":
+ path = download_sortmerna_db(res_entry['url'], res_entry['keep_file'], outfolder, args.dry_run)Also applies to: 314-315 🧰 Tools🪛 Ruff (0.15.2)[error] 249-249: f-string without any placeholders Remove extraneous (F541) 🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def convert_REDI(bed_url, bed_output, drop_intermediate=True): | ||
| if os.path.isfile(bed_output): | ||
| logging.info(f"{bed_output} already exists.") | ||
|
|
@@ -285,6 +311,8 @@ def main(args): | |
| path = decompress_file(download_resource(res_entry, outfolder, args.dry_run)) | ||
| elif ftype == "model": | ||
| path = download_deepvariant_model_files(res_entry['url'], outfolder) | ||
| elif ftype == "sortmerna": | ||
| path = download_sortmerna_db(res_entry['url'], res_entry['keep_file'], outfolder) | ||
| else: | ||
| logging.warning(f"Unknown filetype for {name} as its {ftype}. Skipping.") | ||
| continue | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,7 +62,7 @@ rule genotype: | |
| threads: config["params"]["t1k"]["threads"] | ||
| resources: | ||
| runtime="240m", | ||
| ncpus=4, | ||
| ncpus=1, | ||
| mem="32G", | ||
|
Comment on lines
62
to
66
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: find . -name "config_main.yaml" -o -name "HLA_typing.smk" | head -20Repository: ctglab/ENEO Length of output: 112 🏁 Script executed: rg -n "t1k" --type yamlRepository: ctglab/ENEO Length of output: 389 🏁 Script executed: # Locate and read the HLA_typing.smk file
find . -name "HLA_typing.smk" -exec wc -l {} +Repository: ctglab/ENEO Length of output: 90 🏁 Script executed: # Extract t1k configuration from config_main.yaml
rg -A 10 "t1k:" config/config_main.yamlRepository: ctglab/ENEO Length of output: 619 🏁 Script executed: # Read the HLA_typing.smk file to see context around lines 62-66
cat -n workflow/rules/HLA_typing.smk | sed -n '50,80p'Repository: ctglab/ENEO Length of output: 1117 🏁 Script executed: # Check the extract_hla rule to see how it handles resources
cat -n workflow/rules/HLA_typing.smk | sed -n '79,108p'Repository: ctglab/ENEO Length of output: 1082 Align The genotype rule declares The extract_hla rule is correctly configured with 🤖 Prompt for AI Agents |
||
| log: | ||
| os.path.join( | ||
|
|
@@ -102,7 +102,7 @@ rule extract_hla: | |
| ), | ||
| resources: | ||
| runtime="20m", | ||
| ncpus=2, | ||
| ncpus=1, | ||
| mem="8G", | ||
| shell: | ||
| "python3 {input.hla_script} {input.genotype} > {output}" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: ctglab/ENEO
Length of output: 176
🏁 Script executed:
Repository: ctglab/ENEO
Length of output: 230
Remove the unnecessary f-string to satisfy Ruff F541.
f"--wildcards"on line 249 has no placeholders and triggers a lint error. The adjacentf"*/{keep_file}"should remain since it contains a placeholder.🧹 Minimal fix
🧰 Tools
🪛 Ruff (0.15.2)
[error] 249-249: f-string without any placeholders
Remove extraneous
fprefix(F541)
🤖 Prompt for AI Agents