diff --git a/dnase/trackhub/MakeTrackhub.py b/dnase/trackhub/MakeTrackhub.py index 6af24685..734c5f3b 100755 --- a/dnase/trackhub/MakeTrackhub.py +++ b/dnase/trackhub/MakeTrackhub.py @@ -312,9 +312,12 @@ def shortest_unique_strings(array, minlength=1): # So sampleName_trackname is increased by 4+8+9=21 characters. # If sampleName_trackname starts with 107 characters, then 128 characters get sent to the server. This causes an error. # So sampleName_trackname needs to be 106 characters or less. - sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['SampleID']) - - + if args.supertrack == "By_Locus": + # curGroup does not contain the flowcell ID here. + sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['FlowCellID'] + "_" + curSample['SampleID']) + else: + sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['SampleID']) + # Make sure there are no duplicate track names. if sampleName_trackname in sampleName_dict: if args.verbose: @@ -411,7 +414,7 @@ def shortest_unique_strings(array, minlength=1): visibility="full", parentonoff=DensCovTracksDefaultDisplayMode, tracktype="bigWig", - viewLimits="0:500", + viewLimits="0:500", #Keep high since it becomes a hard limit in the UI autoScale='on', alwaysZero='on', maxHeightPixels="100:30:10", diff --git a/dnase/trackhub/samplesforTrackhub.R b/dnase/trackhub/samplesforTrackhub.R index 9d29236c..50490296 100755 --- a/dnase/trackhub/samplesforTrackhub.R +++ b/dnase/trackhub/samplesforTrackhub.R @@ -166,11 +166,12 @@ colorAssignments <- NULL # Initialize "data" with just column names. We'll be adding rows to this later on in the code. -outputCols <- c("Name", "SampleID", "Assay", "Group", "filebase", "Mapped_Genome", "Annotation_Genome", "Color", "analyzed_reads", "Genomic_coverage", "SPOT", "Num_hotspots", "Exclude", "Age", "Institution", "Replicate", "Bait_set", "Genetic_Modification") +outputCols <- c("Name", "SampleID", "Assay", "Group", "filebase", "Mapped_Genome", "Annotation_Genome", "Color", "analyzed_reads", "Genomic_coverage", "SPOT", "Num_hotspots", "Exclude", "Age", "Institution", "Replicate", "Bait_set", "Genetic_Modification", "FlowCellID") if(opt$project == "CEGS_byLocus") { outputCols <- c(outputCols, "Study", "Project", "Assembly", "Type") } +data_keys <- vector() data <- data.frame(matrix(ncol=length(outputCols), nrow=1)) colnames(data) <- outputCols i <- 0 # This will be our "data" output variable index. @@ -421,6 +422,20 @@ for(curdir in mappeddirs) { } data$filebase[i] <- paste0(curdir, "/", paste0(unlist(strsplit(basename(analysisFile), "\\."))[2:3], collapse=".")) + + # Check for duplicate analysisFiles, possibly left over from a previous run. + # Note: "data$Group" does not contain flowcell ID info when opt$project=CEGS_byLocus, so we need to get it from curdir. + # FlowCellID will also be used in MakeTrackhub.py to create unique byLocus tracknames. + data$FlowCellID[i] <- strsplit(curdir, "/", fixed=TRUE)[[1]][1] + data_key <- paste(data$SampleID[i], data$Mapped_Genome[i], data$FlowCellID[i], sep="") + if(i > 1) { + if(data_key %in% data_keys){ + message("[samplesforTrackhub] ", "WARNING Possible duplicate analysis files found in ", curdir) + msg <- paste("See: SampleID:", data$SampleID[i], "Mapped_Genome:", data$Mapped_Genome[i], "FlowCellID:", data$FlowCellID[i], sep=" ") + message("[samplesforTrackhub] ", msg) + } + } + data_keys[i] <- data_key } }