Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified rail_native
Binary file not shown.
28 changes: 18 additions & 10 deletions tools/compile.rail
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ digits = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
alpha_lower = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
alpha_upper = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]

has item lst = if length lst == 0 then false else if head lst == item then true else has item (tail lst)
has item lst = if lst == [] then false else if head lst == item then true else has item (tail lst)
is_digit c = has c digits
hex_letters = ["a", "b", "c", "d", "e", "f", "A", "B", "C", "D", "E", "F"]
is_hex_digit c = is_digit c || has c hex_letters
Expand All @@ -25,8 +25,16 @@ hex_digit_val c =
else if c == "f" || c == "F" then 15
else 0

-- PERF: every lexer helper below uses `== []` for its empty-check, NOT
-- `length cs == 0`. _rail_length walks the whole list, so `length cs == 0`
-- is O(remaining) PER STEP. The main loop `tc` recurses once per token over
-- the remaining char list, so `length cs == 0` there is O(chars x tokens)
-- ~ 2.7e10 on the 551K-char compile.rail -> ~5 min just to tokenize. `rev`
-- (over ~100K tokens) and lx_str/lx_col (over long string/word spans) compound
-- it. `== []` is O(1) and byte-identical (same tokens) -> self-host fixed
-- point preserved. Self-compile tokenize: ~297s -> sub-second.
htint_acc cs acc =
if length cs == 0 then acc
if cs == [] then acc
else htint_acc (tail cs) (acc * 16 + hex_digit_val (head cs))

htint s acc =
Expand All @@ -35,37 +43,37 @@ kw_list = ["let", "in", "if", "then", "else", "match", "type", "import", "module
is_kw w = has w kw_list

lx_col pred cs =
if length cs == 0 then ("", [], 0)
if cs == [] then ("", [], 0)
else if pred (head cs) then
let (r, rest, n) = lx_col pred (tail cs)
(append (head cs) r, rest, n + 1)
else ("", cs, 0)

lx_str cs acc cnt =
if length cs == 0 then (acc, [], cnt)
if cs == [] then (acc, [], cnt)
else if head cs == "\"" then (acc, tail cs, cnt + 1)
else if head cs == "\\" then
if length (tail cs) > 0 then
if tail cs != [] then
let nx = head (tail cs)
let e = if nx == "n" then "\n" else if nx == "t" then "\t" else if nx == "\\" then "\\" else if nx == "\"" then "\"" else if nx == "{" then "{" else if nx == "}" then "}" else append "\\" nx
lx_str (tail (tail cs)) (append acc e) (cnt + 2)
else (append acc "\\", [], cnt + 1)
else lx_str (tail cs) (append acc (head cs)) (cnt + 1)

lx_skip cs =
if length cs == 0 then ([], 0)
if cs == [] then ([], 0)
else if head cs == "\n" then (cs, 0)
else
let (rest, n) = lx_skip (tail cs)
(rest, n + 1)

lx_pk cs = if length cs == 0 then "" else head cs
lx_pk cs = if cs == [] then "" else head cs

rev_acc lst acc = if length lst == 0 then acc else rev_acc (tail lst) (cons (head lst) acc)
rev_acc lst acc = if lst == [] then acc else rev_acc (tail lst) (cons (head lst) acc)
rev lst = rev_acc lst []

tc cs acc pacc ln col =
if length cs == 0 then (rev (cons ("eof", "") acc), rev (cons (ln, col) pacc))
if cs == [] then (rev (cons ("eof", "") acc), rev (cons (ln, col) pacc))
else
let c = head cs
let r = tail cs
Expand Down Expand Up @@ -180,7 +188,7 @@ tc3 c r acc pacc ln col =
-- `line_col` tracks the column of the first non-nl token of the current line.
-- Operates on parallel (toks, positions) lists so positions stay aligned.
strip_nl_pp_loop ts ps depth line_col tacc pacc =
if length ts == 0 then (rev tacc, rev pacc)
if ts == [] then (rev tacc, rev pacc)
else
let t = head ts
let p = head ps
Expand Down
Loading