mauranolab · cadley-nyulangone · Mar 26, 2020 · Mar 26, 2020 · Mar 26, 2020 · Mar 30, 2020
diff --git a/dnase/trackhub/MakeTrackhub.py b/dnase/trackhub/MakeTrackhub.py
@@ -312,9 +312,12 @@ def shortest_unique_strings(array, minlength=1):
             # So sampleName_trackname is increased by 4+8+9=21 characters.
             # If sampleName_trackname starts with 107 characters, then 128 characters get sent to the server.  This causes an error.
             # So sampleName_trackname needs to be 106 characters or less. 
-            sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['SampleID'])
-
-
+            if args.supertrack == "By_Locus":
+                # curGroup does not contain the flowcell ID here.
+                sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['FlowCellID'] + "_" + curSample['SampleID'])
+            else:
+                sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['SampleID'])
+
             # Make sure there are no duplicate track names.
             if sampleName_trackname in sampleName_dict:
                 if args.verbose:
@@ -411,7 +414,7 @@ def shortest_unique_strings(array, minlength=1):
                             visibility="full",
                             parentonoff=DensCovTracksDefaultDisplayMode,
                             tracktype="bigWig",
-                            viewLimits="0:500",
+                            viewLimits="0:500", #Keep high since it becomes a hard limit in the UI
                             autoScale='on',
                             alwaysZero='on',
                             maxHeightPixels="100:30:10",

diff --git a/dnase/trackhub/samplesforTrackhub.R b/dnase/trackhub/samplesforTrackhub.R
@@ -166,11 +166,12 @@ colorAssignments <- NULL
 
 
 # Initialize "data" with just column names.  We'll be adding rows to this later on in the code.
-outputCols <- c("Name", "SampleID", "Assay", "Group", "filebase", "Mapped_Genome", "Annotation_Genome", "Color", "analyzed_reads", "Genomic_coverage", "SPOT", "Num_hotspots", "Exclude", "Age", "Institution", "Replicate", "Bait_set", "Genetic_Modification")
+outputCols <- c("Name", "SampleID", "Assay", "Group", "filebase", "Mapped_Genome", "Annotation_Genome", "Color", "analyzed_reads", "Genomic_coverage", "SPOT", "Num_hotspots", "Exclude", "Age", "Institution", "Replicate", "Bait_set", "Genetic_Modification", "FlowCellID")
 if(opt$project == "CEGS_byLocus") {
     outputCols <- c(outputCols, "Study", "Project", "Assembly", "Type")
 }
 
+data_keys <- vector()
 data <- data.frame(matrix(ncol=length(outputCols), nrow=1))
 colnames(data) <- outputCols
 i <- 0 # This will be our "data" output variable index.
@@ -421,6 +422,20 @@ for(curdir in mappeddirs) {
 		}
 
 		data$filebase[i] <- paste0(curdir, "/", paste0(unlist(strsplit(basename(analysisFile), "\\."))[2:3], collapse="."))
+
+		# Check for duplicate analysisFiles, possibly left over from a previous run.
+		# Note:  "data$Group" does not contain flowcell ID info when opt$project=CEGS_byLocus, so we need to get it from curdir.
+		#         FlowCellID will also be used in MakeTrackhub.py to create unique byLocus tracknames.
+		data$FlowCellID[i] <- strsplit(curdir, "/", fixed=TRUE)[[1]][1]
+		data_key <- paste(data$SampleID[i], data$Mapped_Genome[i], data$FlowCellID[i], sep="")
+		if(i > 1) {
+			if(data_key %in% data_keys){
+			message("[samplesforTrackhub] ", "WARNING Possible duplicate analysis files found in ", curdir)
+			msg <- paste("See:    SampleID:", data$SampleID[i], "Mapped_Genome:", data$Mapped_Genome[i], "FlowCellID:", data$FlowCellID[i], sep=" ")
+			message("[samplesforTrackhub] ", msg)
+			}
+		}
+		data_keys[i] <- data_key
 	}
 }