Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions dnase/trackhub/MakeTrackhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,12 @@ def shortest_unique_strings(array, minlength=1):
# So sampleName_trackname is increased by 4+8+9=21 characters.
# If sampleName_trackname starts with 107 characters, then 128 characters get sent to the server. This causes an error.
# So sampleName_trackname needs to be 106 characters or less.
sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['SampleID'])


if args.supertrack == "By_Locus":
# curGroup does not contain the flowcell ID here.
sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['FlowCellID'] + "_" + curSample['SampleID'])
else:
sampleName_trackname = cleanTrackName(sampleNameGenome + "_" + curGroup + "_" + curSample['SampleID'])

# Make sure there are no duplicate track names.
if sampleName_trackname in sampleName_dict:
if args.verbose:
Expand Down Expand Up @@ -411,7 +414,7 @@ def shortest_unique_strings(array, minlength=1):
visibility="full",
parentonoff=DensCovTracksDefaultDisplayMode,
tracktype="bigWig",
viewLimits="0:500",
viewLimits="0:500", #Keep high since it becomes a hard limit in the UI
autoScale='on',
alwaysZero='on',
maxHeightPixels="100:30:10",
Expand Down
17 changes: 16 additions & 1 deletion dnase/trackhub/samplesforTrackhub.R
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,12 @@ colorAssignments <- NULL


# Initialize "data" with just column names. We'll be adding rows to this later on in the code.
outputCols <- c("Name", "SampleID", "Assay", "Group", "filebase", "Mapped_Genome", "Annotation_Genome", "Color", "analyzed_reads", "Genomic_coverage", "SPOT", "Num_hotspots", "Exclude", "Age", "Institution", "Replicate", "Bait_set", "Genetic_Modification")
outputCols <- c("Name", "SampleID", "Assay", "Group", "filebase", "Mapped_Genome", "Annotation_Genome", "Color", "analyzed_reads", "Genomic_coverage", "SPOT", "Num_hotspots", "Exclude", "Age", "Institution", "Replicate", "Bait_set", "Genetic_Modification", "FlowCellID")
if(opt$project == "CEGS_byLocus") {
outputCols <- c(outputCols, "Study", "Project", "Assembly", "Type")
}

data_keys <- vector()
data <- data.frame(matrix(ncol=length(outputCols), nrow=1))
colnames(data) <- outputCols
i <- 0 # This will be our "data" output variable index.
Expand Down Expand Up @@ -421,6 +422,20 @@ for(curdir in mappeddirs) {
}

data$filebase[i] <- paste0(curdir, "/", paste0(unlist(strsplit(basename(analysisFile), "\\."))[2:3], collapse="."))

# Check for duplicate analysisFiles, possibly left over from a previous run.
# Note: "data$Group" does not contain flowcell ID info when opt$project=CEGS_byLocus, so we need to get it from curdir.
# FlowCellID will also be used in MakeTrackhub.py to create unique byLocus tracknames.
data$FlowCellID[i] <- strsplit(curdir, "/", fixed=TRUE)[[1]][1]
data_key <- paste(data$SampleID[i], data$Mapped_Genome[i], data$FlowCellID[i], sep="")
if(i > 1) {
if(data_key %in% data_keys){
message("[samplesforTrackhub] ", "WARNING Possible duplicate analysis files found in ", curdir)
msg <- paste("See: SampleID:", data$SampleID[i], "Mapped_Genome:", data$Mapped_Genome[i], "FlowCellID:", data$FlowCellID[i], sep=" ")
message("[samplesforTrackhub] ", msg)
}
}
data_keys[i] <- data_key
}
}

Expand Down