|
28 | 28 |
|
29 | 29 | import java.io.File; |
30 | 30 | import java.io.IOException; |
31 | | -import java.nio.ByteBuffer; |
32 | 31 | import java.text.DateFormat; |
33 | 32 | import java.text.SimpleDateFormat; |
34 | 33 | import java.util.ArrayList; |
@@ -450,7 +449,7 @@ private void checkHeadersSortOrder(List<SAMFileHeader> headers, boolean isNormal |
450 | 449 | } |
451 | 450 |
|
452 | 451 | List<SAMSequenceDictionary> getSequenceDictionaries(List<SAMFileHeader> headers) { |
453 | | - final List<SAMSequenceDictionary> seqDictionaries = new ArrayList<SAMSequenceDictionary>(); |
| 452 | + final List<SAMSequenceDictionary> seqDictionaries = new ArrayList<>(); |
454 | 453 | for (final SAMFileHeader header : headers) { |
455 | 454 | seqDictionaries.add(header.getSequenceDictionary()); |
456 | 455 | } |
@@ -485,7 +484,7 @@ private void checkSequenceDictionaries(List<SAMFileHeader> normalHeaders, List<S |
485 | 484 |
|
486 | 485 |
|
487 | 486 | // pick the first normal sequence to check against fasta |
488 | | - final List<SAMSequenceRecord> normalSequences = normalSeqDictionaries.get(0).getSequences(); |
| 487 | + final List<SAMSequenceRecord> normalSequences = normalSeqDictionaries.getFirst().getSequences(); |
489 | 488 |
|
490 | 489 | // now check against the supplied reference file |
491 | 490 | final FastaSequenceFile ref = new FastaSequenceFile(new File(referenceFile), true); |
@@ -633,7 +632,7 @@ void walkBams() throws Exception { |
633 | 632 | /** |
634 | 633 | * Sets up 2 Producer threads, 2 Consumer threads and a Cleaner thread, along with the concurrent collections, queues, and barriers used by them all |
635 | 634 | * |
636 | | - * @param ignoreDuplicates indicates whether duplicate records should be discarded out right. Not useful for torrent mode |
| 635 | + * @param includeDups indicates whether duplicate records should be discarded out right. Not useful for torrent mode |
637 | 636 | * @throws Exception |
638 | 637 | */ |
639 | 638 | void walkBams(boolean includeDups) throws Exception { |
@@ -736,7 +735,10 @@ public class Producer implements Runnable { |
736 | 735 | private XXHash64 xxhash64; |
737 | 736 | private final static int seed = 0x9747b28c; // used to initialize the hash value, use whatever value you want, but always the same |
738 | 737 | private final static int ONE_MILLION = 1_000_000; |
739 | | - |
| 738 | + private int failedFilterStartPosition = 0; |
| 739 | + private int failedFilterCountAtStartPosition = 0; |
| 740 | + private static final int MAX_FAILED_RECORDS_PER_POSITION = 1000; |
| 741 | + |
740 | 742 | public Producer(final String[] bamFiles, final CountDownLatch latch, final boolean isNormal, |
741 | 743 | final Queue<SAMRecordFilterWrapper> samQueue, final Thread mainThread, final String query, |
742 | 744 | final CyclicBarrier barrier, boolean includeDups, Accumulator [] accum) throws Exception { |
@@ -874,7 +876,24 @@ private void processRecord(SAMRecord record) throws Exception { |
874 | 876 | /* |
875 | 877 | * we now want to keep track of reads that don't pass the filter for test as well as control |
876 | 878 | */ |
877 | | - addRecordToQueue(record, passesFilter); |
| 879 | + if ( ! passesFilter) { |
| 880 | + /* |
| 881 | + There are instances in the genome where there are a large number of poor quality reads mapping to the same location. |
| 882 | + This is causing memory issues when trying to process these reads, and so will ignore these reads once we have seen 100 reads at the same start position that fail the filter. |
| 883 | + */ |
| 884 | + int start = record.getAlignmentStart(); |
| 885 | + if (start == failedFilterStartPosition) { |
| 886 | + failedFilterCountAtStartPosition++; |
| 887 | + } else { |
| 888 | + failedFilterCountAtStartPosition = 1; |
| 889 | + failedFilterStartPosition = start; |
| 890 | + } |
| 891 | + if (failedFilterCountAtStartPosition <= MAX_FAILED_RECORDS_PER_POSITION) { |
| 892 | + addRecordToQueue(record, false); |
| 893 | + } |
| 894 | + } else { |
| 895 | + addRecordToQueue(record, true); |
| 896 | + } |
878 | 897 | } else { |
879 | 898 | // didn't have any filtering defined - add all |
880 | 899 | addRecordToQueue(record, true); |
@@ -986,7 +1005,7 @@ public void processSAMRecord(final SAMRecordFilterWrapper record) { |
986 | 1005 | * @param length |
987 | 1006 | * @param referenceOffset |
988 | 1007 | * @param passesFilter |
989 | | - * @param readStartPosition start position of the read - depends on strand as to whether this is the alignemtnEnd or alignmentStart |
| 1008 | + * @param readStartPosition start position of the read - depends on strand as to whether this is the alignmentEnd or alignmentStart |
990 | 1009 | */ |
991 | 1010 | public void updateMapWithAccums(int startPosition, final byte[] bases, final byte[] qualities, |
992 | 1011 | boolean forwardStrand, int offset, int length, int referenceOffset, final boolean passesFilter, final int readEndPosition, long readNameHash) { |
@@ -1275,10 +1294,10 @@ private void interrogateAccumulations(Accumulator control, Accumulator test) { |
1275 | 1294 | */ |
1276 | 1295 | if (null != control && null != test) { |
1277 | 1296 | if (control.getPosition() != test.getPosition()) { |
1278 | | - throw new IllegalArgumentException("Control and test accumulator positions do not match!!! control: " + control.toString() + ", and test: " + test.toString()); |
| 1297 | + throw new IllegalArgumentException("Control and test accumulator positions do not match!!! control: " + control + ", and test: " + test); |
1279 | 1298 | } |
1280 | 1299 | } |
1281 | | - final int position = control != null ? control.getPosition() : test.getPosition(); |
| 1300 | + final int position = control != null ? control.getPosition() : test != null ? test.getPosition() : Integer.MAX_VALUE; |
1282 | 1301 |
|
1283 | 1302 | // if we are over the length of this particular sequence - return |
1284 | 1303 | if (position - 1 >= referenceBasesLength) return; |
@@ -1377,7 +1396,7 @@ private void interrogateAccumulations(Accumulator control, Accumulator test) { |
1377 | 1396 | /* |
1378 | 1397 | * populate adjacentAccumulators so that compound snp decision can be made |
1379 | 1398 | */ |
1380 | | - adjacentAccumulators.put(v, new Pair<Accumulator, Accumulator>(control, test)); |
| 1399 | + adjacentAccumulators.put(v, new Pair<>(control, test)); |
1381 | 1400 | } |
1382 | 1401 | } |
1383 | 1402 | } |
|
0 commit comments