diff --git a/rail_native b/rail_native index cb70d8f..19fc7ba 100755 Binary files a/rail_native and b/rail_native differ diff --git a/tools/compile.rail b/tools/compile.rail index 3a4180d..cb1689d 100644 --- a/tools/compile.rail +++ b/tools/compile.rail @@ -7,7 +7,7 @@ digits = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] alpha_lower = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"] alpha_upper = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"] -has item lst = if length lst == 0 then false else if head lst == item then true else has item (tail lst) +has item lst = if lst == [] then false else if head lst == item then true else has item (tail lst) is_digit c = has c digits hex_letters = ["a", "b", "c", "d", "e", "f", "A", "B", "C", "D", "E", "F"] is_hex_digit c = is_digit c || has c hex_letters @@ -25,8 +25,16 @@ hex_digit_val c = else if c == "f" || c == "F" then 15 else 0 +-- PERF: every lexer helper below uses `== []` for its empty-check, NOT +-- `length cs == 0`. _rail_length walks the whole list, so `length cs == 0` +-- is O(remaining) PER STEP. The main loop `tc` recurses once per token over +-- the remaining char list, so `length cs == 0` there is O(chars x tokens) +-- ~ 2.7e10 on the 551K-char compile.rail -> ~5 min just to tokenize. `rev` +-- (over ~100K tokens) and lx_str/lx_col (over long string/word spans) compound +-- it. `== []` is O(1) and byte-identical (same tokens) -> self-host fixed +-- point preserved. Self-compile tokenize: ~297s -> sub-second. htint_acc cs acc = - if length cs == 0 then acc + if cs == [] then acc else htint_acc (tail cs) (acc * 16 + hex_digit_val (head cs)) htint s acc = @@ -35,17 +43,17 @@ kw_list = ["let", "in", "if", "then", "else", "match", "type", "import", "module is_kw w = has w kw_list lx_col pred cs = - if length cs == 0 then ("", [], 0) + if cs == [] then ("", [], 0) else if pred (head cs) then let (r, rest, n) = lx_col pred (tail cs) (append (head cs) r, rest, n + 1) else ("", cs, 0) lx_str cs acc cnt = - if length cs == 0 then (acc, [], cnt) + if cs == [] then (acc, [], cnt) else if head cs == "\"" then (acc, tail cs, cnt + 1) else if head cs == "\\" then - if length (tail cs) > 0 then + if tail cs != [] then let nx = head (tail cs) let e = if nx == "n" then "\n" else if nx == "t" then "\t" else if nx == "\\" then "\\" else if nx == "\"" then "\"" else if nx == "{" then "{" else if nx == "}" then "}" else append "\\" nx lx_str (tail (tail cs)) (append acc e) (cnt + 2) @@ -53,19 +61,19 @@ lx_str cs acc cnt = else lx_str (tail cs) (append acc (head cs)) (cnt + 1) lx_skip cs = - if length cs == 0 then ([], 0) + if cs == [] then ([], 0) else if head cs == "\n" then (cs, 0) else let (rest, n) = lx_skip (tail cs) (rest, n + 1) -lx_pk cs = if length cs == 0 then "" else head cs +lx_pk cs = if cs == [] then "" else head cs -rev_acc lst acc = if length lst == 0 then acc else rev_acc (tail lst) (cons (head lst) acc) +rev_acc lst acc = if lst == [] then acc else rev_acc (tail lst) (cons (head lst) acc) rev lst = rev_acc lst [] tc cs acc pacc ln col = - if length cs == 0 then (rev (cons ("eof", "") acc), rev (cons (ln, col) pacc)) + if cs == [] then (rev (cons ("eof", "") acc), rev (cons (ln, col) pacc)) else let c = head cs let r = tail cs @@ -180,7 +188,7 @@ tc3 c r acc pacc ln col = -- `line_col` tracks the column of the first non-nl token of the current line. -- Operates on parallel (toks, positions) lists so positions stay aligned. strip_nl_pp_loop ts ps depth line_col tacc pacc = - if length ts == 0 then (rev tacc, rev pacc) + if ts == [] then (rev tacc, rev pacc) else let t = head ts let p = head ps