@@ -169,6 +169,7 @@ public class SimpleTextV1LexiconPolicyParser extends XMLLexiconPolicyParser
169169 tokenMap = new HashMap <String , TokenType >();
170170
171171 // build the token list
172+ // Note: do not add a raw quote token here. Quoted text is handled specially in parseToTokens.
172173 tokenMap .put ("(" , TokenType .START_LEVEL );
173174 tokenMap .put (")" , TokenType .END_LEVEL );
174175
@@ -180,7 +181,7 @@ public class SimpleTextV1LexiconPolicyParser extends XMLLexiconPolicyParser
180181 // add the X509Fields
181182 tokenMap .put (X509FieldType .SIGNATURE .getFieldToken (), TokenType .CERTIFICATE_REFERENCE_EXPRESSION );
182183 tokenMap .put (X509FieldType .SIGNATURE_ALGORITHM .getFieldToken (), TokenType .CERTIFICATE_REFERENCE_EXPRESSION );
183-
184+
184185 // add the TBS fields
185186 final TBSFieldName [] tbsFieldNames = (TBSFieldName [].class .cast (TBSFieldName .class .getEnumConstants ()));
186187 for (TBSFieldName tbsFieldName : tbsFieldNames )
@@ -489,17 +490,75 @@ protected Vector<TokenTypeAssociation> parseToTokens(InputStream stream) throws
489490 boolean holdMode = false ;
490491 TokenType holdType = null ;
491492
492- for (int i ; (i = isr .read ()) > 0 ; )
493+ for (int ci ; (ci = isr .read ()) != - 1 ; )
493494 {
494- writer .write (i );
495+ final char ch = (char )ci ;
496+
497+ // If we encounter an opening quote, flush any buffered text and read until the closing quote.
498+ if (ch == '"' )
499+ {
500+ final String pre = writer .toString ().trim ();
501+ if (!pre .isEmpty ())
502+ {
503+ final TokenType exactPreToken = tokenMap .get (pre );
504+ final TokenType addPreType = (exactPreToken != null ) ? exactPreToken : TokenType .LITERAL_EXPRESSION ;
505+ tokens .add (new TokenTypeAssociation (pre , addPreType ));
506+ }
507+ writer = new StringWriter ();
508+
509+ // read until closing quote or EOF; everything inside is a literal expression
510+ StringBuilder quoted = new StringBuilder ();
511+ int cj ;
512+ while ((cj = isr .read ()) != -1 )
513+ {
514+ final char qc = (char )cj ;
515+ // handle escape sequences
516+ if (qc == '\\' )
517+ {
518+ int nd = isr .read ();
519+ if (nd == -1 )
520+ {
521+ // dangling backslash at EOF; treat the backslash literally
522+ quoted .append ('\\' );
523+ break ;
524+ }
525+ final char esc = (char )nd ;
526+ switch (esc )
527+ {
528+ case '\\' :
529+ quoted .append ('\\' );
530+ break ;
531+ case '"' :
532+ quoted .append ('"' );
533+ break ;
534+ default :
535+ // unknown escape - preserve the backslash and the character (e.g., file paths like \t)
536+ quoted .append ('\\' );
537+ quoted .append (esc );
538+ break ;
539+ }
540+ continue ;
541+ }
542+ if (qc == '"' )
543+ break ; // found closing quote
544+ quoted .append (qc );
545+ }
546+
547+ tokens .add (new TokenTypeAssociation (quoted .toString (), TokenType .LITERAL_EXPRESSION ));
548+ // continue to next character
549+ continue ;
550+ }
551+
552+ // Normal processing: append char and continue token detection
553+ writer .write (ch );
495554 final String checkForTokenString = writer .toString ();
496555
497556 // check to see if we have an exact match to a token
498557 TokenType exactMatchToken = tokenMap .get (checkForTokenString );
499558
500559 if (exactMatchToken != null )
501560 {
502- // if the token is an operator, we need to keep looking forward to the next
561+ // if the token is an operator or certificate ref , we need to keep looking forward to the next
503562 // character because some operators are made up of the exact same characters.. we
504563 // may have a partial string of an operator with more characters
505564 if (exactMatchToken == TokenType .OPERATOR_EXPRESSION || exactMatchToken == TokenType .CERTIFICATE_REFERENCE_EXPRESSION )
@@ -535,8 +594,8 @@ protected Vector<TokenTypeAssociation> parseToTokens(InputStream stream) throws
535594
536595 exactMatchToken = tokenMap .get (nextToken );
537596 if (exactMatchToken != null )
538- {
539-
597+ {
598+
540599 tokens .add (new TokenTypeAssociation (nextToken , exactMatchToken ));
541600 }
542601 else
@@ -549,6 +608,7 @@ protected Vector<TokenTypeAssociation> parseToTokens(InputStream stream) throws
549608 {
550609 // we didn't hit an exact match, but the new character we hit may be a reserved token
551610 // check to see if the checkForTokenString now contains a reserved token
611+ boolean handled = false ;
552612 for (String key : tokenMap .keySet ())
553613 {
554614 int idx = checkForTokenString .indexOf (key );
@@ -581,13 +641,19 @@ protected Vector<TokenTypeAssociation> parseToTokens(InputStream stream) throws
581641 // the token is not an operator, so add it the token vector
582642 tokens .add (new TokenTypeAssociation (secondToken , exactMatchToken ));
583643 }
644+ handled = true ;
584645 break ;
585646 }
586647 }
648+
649+ if (!handled )
650+ {
651+ // nothing special found; continue accumulating
652+ }
587653 }
588654 }
589- }
590-
655+ }
656+
591657 // now that we have completed traversing the expression lexicon, if there is anything left over in the writer then
592658 // add it as a token
593659 final String remainingString = writer .toString ().trim ();
@@ -638,7 +704,13 @@ protected static enum TokenType
638704 /**
639705 * A certificate reference expression
640706 */
641- CERTIFICATE_REFERENCE_EXPRESSION ;
707+ CERTIFICATE_REFERENCE_EXPRESSION ,
708+
709+ /**
710+ * Marks the beginning or end of LITERAL_EXPRESSION where all contents regardless of character will
711+ * be considered part of the LIBERAL_EXPRESSION.
712+ */
713+ LITERAL_QUOTE ;
642714 }
643715
644716 protected Integer resetLevel ()
0 commit comments