Skip to content

Commit ba27de2

Browse files
committed
refactor(psl-parser): remove offset tracking from tokenizer
Offsets on tokens are incompatible with the incremental parsing goal. The Tokenizer class now tracks position internally without exposing offsets on the Token interface.
1 parent 53ee1d5 commit ba27de2

2 files changed

Lines changed: 15 additions & 37 deletions

File tree

packages/1-framework/2-authoring/psl-parser/src/tokenizer.ts

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ export type TokenKind =
2424
export interface Token {
2525
readonly kind: TokenKind;
2626
readonly text: string;
27-
readonly offset: number;
2827
}
2928

3029
export class Tokenizer {
@@ -40,7 +39,7 @@ export class Tokenizer {
4039

4140
next(): Token {
4241
const token = this.#buffer.shift() ?? scan(this.#source, this.#pos);
43-
this.#pos = token.offset + token.text.length;
42+
this.#pos += token.text.length;
4443
return token;
4544
}
4645

@@ -50,7 +49,7 @@ export class Tokenizer {
5049
if (last?.kind === 'Eof') {
5150
break;
5251
}
53-
const scanPos = last !== undefined ? last.offset + last.text.length : this.#pos;
52+
const scanPos = this.#buffer.reduce((pos, t) => pos + t.text.length, this.#pos);
5453
this.#buffer.push(scan(this.#source, scanPos));
5554
}
5655
return (
@@ -61,7 +60,7 @@ export class Tokenizer {
6160

6261
function scan(source: string, pos: number): Token {
6362
if (pos >= source.length) {
64-
return { kind: 'Eof', text: '', offset: source.length };
63+
return { kind: 'Eof', text: '' };
6564
}
6665

6766
return (
@@ -75,7 +74,6 @@ function scan(source: string, pos: number): Token {
7574
scanPunctuation(source, pos) ?? {
7675
kind: 'Invalid' as const,
7776
text: readChar(source, pos),
78-
offset: pos,
7977
}
8078
);
8179
}
@@ -84,9 +82,9 @@ function scanNewline(source: string, pos: number): Token | undefined {
8482
const ch = source.charAt(pos);
8583
if (ch !== '\r' && ch !== '\n') return undefined;
8684
if (ch === '\r' && source.charAt(pos + 1) === '\n') {
87-
return { kind: 'Newline', text: '\r\n', offset: pos };
85+
return { kind: 'Newline', text: '\r\n' };
8886
}
89-
return { kind: 'Newline', text: ch, offset: pos };
87+
return { kind: 'Newline', text: ch };
9088
}
9189

9290
function scanWhitespace(source: string, pos: number): Token | undefined {
@@ -98,7 +96,7 @@ function scanWhitespace(source: string, pos: number): Token | undefined {
9896
if (c !== ' ' && c !== '\t') break;
9997
end++;
10098
}
101-
return { kind: 'Whitespace', text: source.slice(pos, end), offset: pos };
99+
return { kind: 'Whitespace', text: source.slice(pos, end) };
102100
}
103101

104102
function scanComment(source: string, pos: number): Token | undefined {
@@ -109,15 +107,15 @@ function scanComment(source: string, pos: number): Token | undefined {
109107
if (c === '\n' || c === '\r') break;
110108
end++;
111109
}
112-
return { kind: 'Comment', text: source.slice(pos, end), offset: pos };
110+
return { kind: 'Comment', text: source.slice(pos, end) };
113111
}
114112

115113
function scanAt(source: string, pos: number): Token | undefined {
116114
if (source.charAt(pos) !== '@') return undefined;
117115
if (source.charAt(pos + 1) === '@') {
118-
return { kind: 'DoubleAt', text: '@@', offset: pos };
116+
return { kind: 'DoubleAt', text: '@@' };
119117
}
120-
return { kind: 'At', text: '@', offset: pos };
118+
return { kind: 'At', text: '@' };
121119
}
122120

123121
function scanIdent(source: string, pos: number): Token | undefined {
@@ -132,7 +130,7 @@ function scanIdent(source: string, pos: number): Token | undefined {
132130
break;
133131
}
134132
}
135-
return { kind: 'Ident', text: source.slice(pos, end), offset: pos };
133+
return { kind: 'Ident', text: source.slice(pos, end) };
136134
}
137135

138136
function scanNumber(source: string, pos: number): Token | undefined {
@@ -153,7 +151,7 @@ function scanNumber(source: string, pos: number): Token | undefined {
153151
end++;
154152
}
155153
}
156-
return { kind: 'NumberLiteral', text: source.slice(pos, end), offset: pos };
154+
return { kind: 'NumberLiteral', text: source.slice(pos, end) };
157155
}
158156

159157
function scanString(source: string, pos: number): Token | undefined {
@@ -167,22 +165,22 @@ function scanString(source: string, pos: number): Token | undefined {
167165
}
168166
if (c === '"') {
169167
end++; // include closing quote
170-
return { kind: 'StringLiteral', text: source.slice(pos, end), offset: pos };
168+
return { kind: 'StringLiteral', text: source.slice(pos, end) };
171169
}
172170
if (c === '\n' || c === '\r') {
173171
// Unterminated: stop before newline
174-
return { kind: 'StringLiteral', text: source.slice(pos, end), offset: pos };
172+
return { kind: 'StringLiteral', text: source.slice(pos, end) };
175173
}
176174
end++;
177175
}
178176
// Unterminated at EOF
179-
return { kind: 'StringLiteral', text: source.slice(pos, end), offset: pos };
177+
return { kind: 'StringLiteral', text: source.slice(pos, end) };
180178
}
181179

182180
function scanPunctuation(source: string, pos: number): Token | undefined {
183181
const kind = PUNCTUATION[source.charAt(pos)];
184182
if (kind === undefined) return undefined;
185-
return { kind, text: source.charAt(pos), offset: pos };
183+
return { kind, text: source.charAt(pos) };
186184
}
187185

188186
function readChar(source: string, pos: number): string {

packages/1-framework/2-authoring/psl-parser/test/tokenizer.test.ts

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -304,26 +304,6 @@ describe('Tokenizer', () => {
304304
});
305305
});
306306

307-
describe('offsets', () => {
308-
it('each token offset equals sum of preceding text lengths', () => {
309-
const source = 'model User {\n id Int @id\n}\n';
310-
const tokens = collectAll(source);
311-
let expectedOffset = 0;
312-
for (const token of tokens) {
313-
expect(token.offset).toBe(expectedOffset);
314-
expectedOffset += token.text.length;
315-
}
316-
});
317-
318-
it('Eof offset equals source length', () => {
319-
const source = 'model User {}';
320-
const tokens = collectAll(source);
321-
const eof = tokens[tokens.length - 1]!;
322-
expect(eof.kind).toBe('Eof');
323-
expect(eof.offset).toBe(source.length);
324-
});
325-
});
326-
327307
describe('cursor API', () => {
328308
it('peek(0) returns the same token as a subsequent next()', () => {
329309
const t = new Tokenizer('model User');

0 commit comments

Comments
 (0)