From 469bdda132175d86feebf6f3bc5895d7fd938690 Mon Sep 17 00:00:00 2001 From: NoahHenrikKleinschmidt Date: Mon, 28 Apr 2025 10:50:37 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9E=20fix:=20TSNE=20now=20encodes=20qu?= =?UTF-8?q?ery=20sequence=20correctly=20+++=20fixed=20types=20in=20CLI=20f?= =?UTF-8?q?or=20min=5Feps,=20max=5Feps,=20eps=5Fstep,=20and=20min=5Fsample?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/ClusterMSA.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/ClusterMSA.py b/scripts/ClusterMSA.py index 6b688b7..5a4bcec 100644 --- a/scripts/ClusterMSA.py +++ b/scripts/ClusterMSA.py @@ -50,10 +50,10 @@ def plot_landscape(x, y, df, query_, plot_type): p.add_argument('--eps_val', action='store', type=float, help="Use single value for eps instead of scanning.") p.add_argument('--resample', action='store_true', help='If included, will resample the original MSA with replacement before writing.') p.add_argument("--gap_cutoff", action='store', type=float, default=0.25, help='Remove sequences with gaps representing more than this frac of seq.') - p.add_argument('--min_eps', action='store',default=3, help='Min epsilon value to scan for DBSCAN (Default 3).') - p.add_argument('--max_eps', action='store',default=20, help='Max epsilon value to scan for DBSCAN (Default 20).') - p.add_argument('--eps_step', action='store',default=.5, help='step for epsilon scan for DBSCAN (Default 0.5).') - p.add_argument('--min_samples', action='store',default=3, help='Default min_samples for DBSCAN (Default 3, recommended no lower than that).') + p.add_argument('--min_eps', action='store',default=3, type=int, help='Min epsilon value to scan for DBSCAN (Default 3).') + p.add_argument('--max_eps', action='store',default=20, type=int, help='Max epsilon value to scan for DBSCAN (Default 20).') + p.add_argument('--eps_step', action='store',default=.5, type=float, help='step for epsilon scan for DBSCAN (Default 0.5).') + p.add_argument('--min_samples', action='store',default=3, type=int, help='Default min_samples for DBSCAN (Default 3, recommended no lower than that).') p.add_argument('--run_PCA', action='store_true', help='Run PCA on one-hot embedding of sequences and store in output_cluster_metadata.tsv') p.add_argument('--run_TSNE', action='store_true', help='Run TSNE on one-hot embedding of sequences and store in output_cluster_metadata.tsv') @@ -197,7 +197,7 @@ def plot_landscape(x, y, df, query_, plot_type): if args.run_TSNE: lprint('Running TSNE ...',f) - ohe_vecs = encode_seqs(df.sequence.tolist()+[query_.sequence.tolist()], max_len=L) + ohe_vecs = encode_seqs(df.sequence.tolist()+query_.sequence.tolist(), max_len=L) # different than PCA because tSNE doesn't have .transform attribute mdl = TSNE()