diff --git a/source/ReadAlign_maxMappableLength2strands.cpp b/source/ReadAlign_maxMappableLength2strands.cpp index 9db6618c..c0ddee78 100644 --- a/source/ReadAlign_maxMappableLength2strands.cpp +++ b/source/ReadAlign_maxMappableLength2strands.cpp @@ -58,12 +58,7 @@ uint ReadAlign::maxMappableLength2strands(uint pieceStartIn, uint pieceLengthIn, maxL=0; Nrep = maxMappableLength(Read1, pieceStart, pieceLength, G, SA, iSA1 & P->SAiMarkNmask, iSA2, dirR, maxL, indStartEnd, P); #else - if (Lind < P->genomeSAindexNbases && (iSA1 & P->SAiMarkNmaskC)==0 ) {//no need for SA search - indStartEnd[0]=iSA1; - indStartEnd[1]=iSA2; - Nrep=indStartEnd[1]-indStartEnd[0]+1; - maxL=Lind; - } else if (iSA1==iSA2) {//unique align already, just find maxL + if (iSA1==iSA2) {//unique align already, just find maxL if ((iSA1 & P->SAiMarkNmaskC)!=0) { ostringstream errOut; errOut << "BUG: in ReadAlign::maxMappableLength2strands"; @@ -75,7 +70,14 @@ uint ReadAlign::maxMappableLength2strands(uint pieceStartIn, uint pieceLengthIn, maxL=compareSeqToGenome(Read1,pieceStart, pieceLength, Lind, G, SA, iSA1, dirR, comparRes, P); } else {//SA search, pieceLength>maxL if ( (iSA1 & P->SAiMarkNmaskC)==0 ) {//no N in the prefix - maxL=Lind; + // iSA2 can now point to a suffix with multiple "N" bases, eg. this sort order + // ACTGAAAA <- iSA1 + // ACTGNNNN <- iSA2 + // ACTTACTG <- next prefix + // As a result, we can only assume the match length is as good up to + // the length that iSA2 hits, and need to compare everything past that length in the binary search + bool cres; + maxL=compareSeqToGenome(Read1,pieceStart, pieceLength, 0, G, SA, iSA2, dirR, cres, P); } else { maxL=0; };