diff --git a/lib/lua/vm/stdlib/string.ex b/lib/lua/vm/stdlib/string.ex index dd69d7c..4f9737a 100644 --- a/lib/lua/vm/stdlib/string.ex +++ b/lib/lua/vm/stdlib/string.ex @@ -292,112 +292,320 @@ defmodule Lua.VM.Stdlib.String do raise_arg_expected(1, "format") end - # Format string parser + # Format string parser - supports full format specifiers: %[flags][width][.precision]specifier defp format_string("", _args, acc), do: acc defp format_string("%" <> rest, args, acc) do case rest do - "%" <> rest -> - format_string(rest, args, acc <> "%") + "%" <> rest2 -> + format_string(rest2, args, acc <> "%") - "s" <> rest -> - [arg | remaining_args] = args - str = Util.to_lua_string(arg) - format_string(rest, remaining_args, acc <> str) - - "d" <> rest -> + _ -> + {spec, rest2} = parse_format_spec(rest) [arg | remaining_args] = args - str = format_integer(arg) - format_string(rest, remaining_args, acc <> str) + str = apply_format_spec(spec, arg) + format_string(rest2, remaining_args, acc <> str) + end + end - "i" <> rest -> - [arg | remaining_args] = args - str = format_integer(arg) - format_string(rest, remaining_args, acc <> str) + defp format_string(<>, args, acc) do + format_string(rest, args, acc <> <>) + end - "f" <> rest -> - [arg | remaining_args] = args - str = format_float(arg) - format_string(rest, remaining_args, acc <> str) + # Parse a format spec: [flags][width][.precision]specifier + defp parse_format_spec(str) do + {flags, str} = parse_flags(str, "") + {width, str} = parse_width(str) + {precision, str} = parse_precision(str) + {specifier, str} = parse_specifier(str) + {{flags, width, precision, specifier}, str} + end - "x" <> rest -> - [arg | remaining_args] = args - str = format_hex(arg, :lower) - format_string(rest, remaining_args, acc <> str) + defp parse_flags(<>, acc) when c in ~c(-+ 0#) do + parse_flags(rest, acc <> <>) + end - "X" <> rest -> - [arg | remaining_args] = args - str = format_hex(arg, :upper) - format_string(rest, remaining_args, acc <> str) + defp parse_flags(str, acc), do: {acc, str} - "o" <> rest -> - [arg | remaining_args] = args - str = format_octal(arg) - format_string(rest, remaining_args, acc <> str) + defp parse_width(<> = str) when c in ?0..?9 do + parse_number(str, 0) + end - "c" <> rest -> - [arg | remaining_args] = args - str = format_char(arg) - format_string(rest, remaining_args, acc <> str) + defp parse_width(str), do: {nil, str} - "q" <> rest -> - [arg | remaining_args] = args - str = format_quoted(arg) - format_string(rest, remaining_args, acc <> str) + defp parse_precision("." <> rest) do + case rest do + <> when c in ?0..?9 -> + parse_number(rest, 0) _ -> - raise ArgumentError, - function_name: "string.format", - details: "invalid option '%#{String.first(rest)}'" + {0, rest} end end - defp format_string(<>, args, acc) do - format_string(rest, args, acc <> <>) + defp parse_precision(str), do: {nil, str} + + defp parse_number(<>, acc) when c in ?0..?9 do + parse_number(rest, acc * 10 + (c - ?0)) end - # Format helpers - defp format_integer(val) when is_integer(val), do: Integer.to_string(val) - defp format_integer(val) when is_float(val), do: Integer.to_string(trunc(val)) + defp parse_number(str, acc), do: {acc, str} + + defp parse_specifier(<>), do: {<>, rest} - defp format_integer(_) do + defp parse_specifier("") do raise ArgumentError, function_name: "string.format", - expected: "number" + details: "invalid format string" + end + + # Apply a format spec to a value + defp apply_format_spec({flags, width, precision, specifier}, arg) do + raw = + case specifier do + "d" -> format_spec_integer(arg) + "i" -> format_spec_integer(arg) + "u" -> format_spec_unsigned(arg) + "f" -> format_spec_float(arg, precision || 6) + "e" -> format_spec_scientific(arg, precision || 6, :lower) + "E" -> format_spec_scientific(arg, precision || 6, :upper) + "g" -> format_spec_general(arg, precision || 6, :lower) + "G" -> format_spec_general(arg, precision || 6, :upper) + "x" -> format_spec_hex(arg, :lower) + "X" -> format_spec_hex(arg, :upper) + "o" -> format_spec_octal(arg) + "c" -> format_char(arg) + "s" -> format_spec_string(arg, precision) + "q" -> format_quoted(arg) + _ -> raise ArgumentError, function_name: "string.format", details: "invalid option '%#{specifier}'" + end + + apply_width_flags(raw, flags, width) end - defp format_float(val) when is_number(val) do - "~.6f" |> :io_lib.format([val / 1]) |> IO.iodata_to_binary() + defp format_spec_integer(val) when is_integer(val), do: Integer.to_string(val) + defp format_spec_integer(val) when is_float(val), do: Integer.to_string(trunc(val)) + + defp format_spec_integer(_) do + raise ArgumentError, function_name: "string.format", expected: "number" end - defp format_float(_) do - raise ArgumentError, - function_name: "string.format", - expected: "number" + defp format_spec_unsigned(val) when is_integer(val) and val >= 0, do: Integer.to_string(val) + + # Wrap negative as unsigned 64-bit + defp format_spec_unsigned(val) when is_integer(val), do: Integer.to_string(val + 0x10000000000000000) + + defp format_spec_unsigned(val) when is_float(val), do: format_spec_unsigned(trunc(val)) + + defp format_spec_unsigned(_) do + raise ArgumentError, function_name: "string.format", expected: "number" + end + + defp format_spec_float(val, precision) when is_number(val) do + float_val = val / 1 + + float_val + |> :erlang.float_to_binary([{:decimals, precision}, :compact]) + |> expand_float(precision) + end + + defp format_spec_float(_, _) do + raise ArgumentError, function_name: "string.format", expected: "number" + end + + # Ensure the float string has exactly `precision` decimal places + defp expand_float(str, precision) do + if precision == 0 do + # Remove the decimal point entirely for precision 0 + case String.split(str, ".") do + [int_part, frac] -> + # Round: check first decimal digit + first_frac = String.first(frac) + + if first_frac != nil and String.to_integer(first_frac) >= 5 do + # Need to round up + {int_val, _} = Integer.parse(int_part) + + if int_val >= 0 do + Integer.to_string(int_val + 1) + else + Integer.to_string(int_val - 1) + end + else + int_part + end + + _ -> + str + end + else + case String.split(str, ".") do + [int_part, frac] -> + padded_frac = String.pad_trailing(frac, precision, "0") + "#{int_part}.#{padded_frac}" + + [int_part] -> + "#{int_part}.#{String.duplicate("0", precision)}" + end + end + end + + defp format_spec_scientific(val, precision, case_style) when is_number(val) do + str = format_scientific_str(val / 1, precision) + if case_style == :upper, do: String.upcase(str), else: str + end + + defp format_spec_scientific(_, _, _) do + raise ArgumentError, function_name: "string.format", expected: "number" + end + + # Format a float in scientific notation: mantissa e+/-exp with at least 2 digit exponent + defp format_scientific_str(float_val, precision) do + if float_val == 0.0 do + mantissa = "0." <> String.duplicate("0", precision) + "#{mantissa}e+00" + else + exp = float_val |> abs() |> :math.log10() |> floor() + mantissa = float_val / :math.pow(10, exp) + + # Format mantissa with the required precision + mantissa_str = + :erlang.float_to_binary(mantissa, [{:decimals, precision + 1}, :compact]) + + # Round to the requested precision + mantissa_str = round_mantissa(mantissa_str, precision) + + # Check if rounding pushed mantissa to 10.0 (e.g., 9.999... -> 10.0) + {mantissa_str, exp} = normalize_mantissa(mantissa_str, exp) + + exp_sign = if exp >= 0, do: "+", else: "-" + exp_str = exp |> abs() |> Integer.to_string() |> String.pad_leading(2, "0") + "#{mantissa_str}e#{exp_sign}#{exp_str}" + end end - defp format_hex(val, case_style) when is_integer(val) do + defp round_mantissa(str, precision) do + if precision == 0 do + case String.split(str, ".") do + [int_part, frac] -> + first = String.first(frac) + + if first != nil and String.to_integer(first) >= 5 do + {n, _} = Integer.parse(int_part) + # Preserve sign for rounding + if n >= 0, do: Integer.to_string(n + 1), else: Integer.to_string(n - 1) + else + int_part + end + + _ -> + str + end + else + expand_float(str, precision) + end + end + + defp normalize_mantissa(mantissa_str, exp) do + # Parse the mantissa value to check if |mantissa| >= 10 after rounding + {mantissa_val, _} = Float.parse(mantissa_str) + + if abs(mantissa_val) >= 10.0 do + new_mantissa = mantissa_val / 10.0 + # Re-format the new mantissa - extract precision from the string + precision = + case String.split(mantissa_str, ".") do + [_, frac] -> String.length(frac) + _ -> 0 + end + + new_str = + expand_float( + :erlang.float_to_binary(new_mantissa, [{:decimals, precision + 1}, :compact]), + precision + ) + + {new_str, exp + 1} + else + {mantissa_str, exp} + end + end + + defp format_spec_general(val, precision, case_style) when is_number(val) do + float_val = val / 1 + precision = max(1, precision) + + if float_val == 0.0 do + "0" + else + abs_val = abs(float_val) + exp = abs_val |> :math.log10() |> floor() + + if exp < -4 or exp >= precision do + # Scientific notation with trailing zeros stripped + p = precision - 1 + str = format_scientific_str(float_val, p) + str = strip_trailing_zeros_scientific(str) + if case_style == :upper, do: String.upcase(str), else: str + else + # Fixed notation with trailing zeros stripped + p = precision - exp - 1 + p = max(0, p) + str = format_spec_float(float_val, p) + str = strip_trailing_zeros(str) + if case_style == :upper, do: String.upcase(str), else: str + end + end + end + + defp format_spec_general(_, _, _) do + raise ArgumentError, function_name: "string.format", expected: "number" + end + + defp strip_trailing_zeros(str) do + if String.contains?(str, ".") do + str |> String.trim_trailing("0") |> String.trim_trailing(".") + else + str + end + end + + defp strip_trailing_zeros_scientific(str) do + case Regex.run(~r/^(.*?)([eE][+-]\d+)$/, str) do + [_, mantissa, exp_part] -> + mantissa = strip_trailing_zeros(mantissa) + "#{mantissa}#{exp_part}" + + _ -> + str + end + end + + defp format_spec_hex(val, case_style) when is_integer(val) do str = Integer.to_string(val, 16) if case_style == :lower, do: String.downcase(str), else: String.upcase(str) end - defp format_hex(val, case_style) when is_float(val) do - format_hex(trunc(val), case_style) + defp format_spec_hex(val, case_style) when is_float(val), do: format_spec_hex(trunc(val), case_style) + + defp format_spec_hex(_, _) do + raise ArgumentError, function_name: "string.format", expected: "number" end - defp format_hex(_, _) do - raise ArgumentError, - function_name: "string.format", - expected: "number" + defp format_spec_octal(val) when is_integer(val), do: Integer.to_string(val, 8) + defp format_spec_octal(val) when is_float(val), do: Integer.to_string(trunc(val), 8) + + defp format_spec_octal(_) do + raise ArgumentError, function_name: "string.format", expected: "number" end - defp format_octal(val) when is_integer(val), do: Integer.to_string(val, 8) - defp format_octal(val) when is_float(val), do: Integer.to_string(trunc(val), 8) + defp format_spec_string(arg, precision) do + str = Util.to_lua_string(arg) - defp format_octal(_) do - raise ArgumentError, - function_name: "string.format", - expected: "number" + case precision do + nil -> str + n -> String.slice(str, 0, n) + end end defp format_char(val) when is_integer(val) and val >= 0 and val <= 255, do: <> @@ -561,6 +769,30 @@ defmodule Lua.VM.Stdlib.String do defp string_gsub([], _state), do: raise_arg_expected(1, "gsub") + defp apply_width_flags(str, flags, width) do + width = width || 0 + + if String.length(str) >= width do + str + else + pad_char = + if String.contains?(flags, "0") and not String.contains?(flags, "-"), do: "0", else: " " + + if String.contains?(flags, "-") do + # Left justify + String.pad_trailing(str, width, pad_char) + else + # Right justify (default) + # Handle zero-padding with sign + if pad_char == "0" and String.starts_with?(str, "-") do + "-" <> String.pad_leading(String.slice(str, 1..-1//1), width - 1, "0") + else + String.pad_leading(str, width, pad_char) + end + end + end + end + # Helper: convert Lua 1-based index to 0-based, handle negative indices defp normalize_index(idx, _len) when idx > 0, do: idx - 1 defp normalize_index(idx, len) when idx < 0, do: len + idx diff --git a/test/lua/vm/string_test.exs b/test/lua/vm/string_test.exs index 9df5641..9b59085 100644 --- a/test/lua/vm/string_test.exs +++ b/test/lua/vm/string_test.exs @@ -281,6 +281,128 @@ defmodule Lua.VM.StringTest do end end + describe "string.format with width/precision" do + setup do + %{state: Stdlib.install(State.new())} + end + + test "width specifier right-justifies", %{state: state} do + code = ~s{return string.format("%5d", 42)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, [" 42"], _state} = VM.execute(proto, state) + end + + test "width specifier with left-justify flag", %{state: state} do + code = ~s{return string.format("%-5d", 42)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["42 "], _state} = VM.execute(proto, state) + end + + test "width specifier with zero-pad flag", %{state: state} do + code = ~s{return string.format("%05d", 42)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["00042"], _state} = VM.execute(proto, state) + end + + test "precision for floats", %{state: state} do + code = ~s{return string.format("%.2f", 3.14159)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["3.14"], _state} = VM.execute(proto, state) + end + + test "precision 0 for floats", %{state: state} do + code = ~s{return string.format("%.0f", 3.14)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["3"], _state} = VM.execute(proto, state) + end + + test "string precision truncation", %{state: state} do + code = ~s{return string.format("%.3s", "hello")} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["hel"], _state} = VM.execute(proto, state) + end + + test "%g format with fixed notation", %{state: state} do + code = ~s{return string.format("%g", 100000)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["100000"], _state} = VM.execute(proto, state) + end + + test "%g format with scientific notation", %{state: state} do + code = ~s{return string.format("%g", 1000000)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["1e+06"], _state} = VM.execute(proto, state) + end + + test "%e format", %{state: state} do + code = ~s{return string.format("%e", 100000)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, [result], _state} = VM.execute(proto, state) + assert String.starts_with?(result, "1.0") + assert String.contains?(result, "e+") + end + + test "%E format", %{state: state} do + code = ~s{return string.format("%E", 100000)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, [result], _state} = VM.execute(proto, state) + assert String.starts_with?(result, "1.0") + assert String.contains?(result, "E+") + end + + test "%u format for positive integer", %{state: state} do + code = ~s{return string.format("%u", 42)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["42"], _state} = VM.execute(proto, state) + end + + test "zero-pad with negative number", %{state: state} do + code = ~s{return string.format("%06d", -42)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["-00042"], _state} = VM.execute(proto, state) + end + + test "width with string", %{state: state} do + code = ~s{return string.format("%10s", "hello")} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, [" hello"], _state} = VM.execute(proto, state) + end + + test "left-justify with string", %{state: state} do + code = ~s{return string.format("%-10s", "hello")} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["hello "], _state} = VM.execute(proto, state) + end + + test "width and precision combined for float", %{state: state} do + code = ~s{return string.format("%8.2f", 3.14159)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, [" 3.14"], _state} = VM.execute(proto, state) + end + + test "%02x for hex with zero padding", %{state: state} do + code = ~s{return string.format("%02x", 10)} + assert {:ok, ast} = Parser.parse(code) + assert {:ok, proto} = Compiler.compile(ast, source: "test.lua") + assert {:ok, ["0a"], _state} = VM.execute(proto, state) + end + end + describe "string table access" do test "string functions are accessible via string table" do code = """ @@ -994,6 +1116,161 @@ defmodule Lua.VM.StringTest do end end + describe "string.format property tests" do + setup do + %{state: Stdlib.install(State.new())} + end + + defp run_format(code, state) do + {:ok, ast} = Parser.parse(code) + {:ok, proto} = Compiler.compile(ast, source: "test.lua") + {:ok, [result], _state} = VM.execute(proto, state) + result + end + + property "%d always produces a valid integer string", %{state: state} do + check all(int <- integer(-10_000..10_000)) do + result = run_format("return string.format(\"%d\", #{int})", state) + assert result == Integer.to_string(int) + end + end + + property "%d truncates floats to integers", %{state: state} do + check all( + int_part <- integer(-100..100), + frac <- integer(1..99) + ) do + float_val = int_part + frac / 100 + result = run_format("return string.format(\"%d\", #{float_val})", state) + assert result == Integer.to_string(trunc(float_val)) + end + end + + property "%x produces valid lowercase hex", %{state: state} do + check all(int <- integer(0..65_535)) do + result = run_format("return string.format(\"%x\", #{int})", state) + assert Regex.match?(~r/^[0-9a-f]+$/, result) + {parsed, ""} = Integer.parse(result, 16) + assert parsed == int + end + end + + property "%X produces valid uppercase hex", %{state: state} do + check all(int <- integer(0..65_535)) do + result = run_format("return string.format(\"%X\", #{int})", state) + assert Regex.match?(~r/^[0-9A-F]+$/, result) + {parsed, ""} = Integer.parse(String.downcase(result), 16) + assert parsed == int + end + end + + property "%o produces valid octal", %{state: state} do + check all(int <- integer(0..10_000)) do + result = run_format("return string.format(\"%o\", #{int})", state) + assert Regex.match?(~r/^[0-7]+$/, result) + {parsed, ""} = Integer.parse(result, 8) + assert parsed == int + end + end + + property "%s always returns a string containing the argument", %{state: state} do + check all(str <- string(:ascii, min_length: 1, max_length: 10)) do + escaped = escape_string(str) + result = run_format("return string.format(\"%s\", \"#{escaped}\")", state) + assert result == str + end + end + + property "%c produces single byte for values 0-127", %{state: state} do + check all(byte <- integer(1..127)) do + result = run_format("return string.format(\"%c\", #{byte})", state) + assert byte_size(result) == 1 + assert :binary.first(result) == byte + end + end + + property "%% always produces literal percent", %{state: state} do + check all(int <- integer(0..100)) do + result = run_format("return string.format(\"#{int}%%\")", state) + assert result == "#{int}%" + end + end + + property "width specifier pads to at least width characters", %{state: state} do + check all( + int <- integer(-999..999), + width <- integer(1..15) + ) do + result = run_format("return string.format(\"%#{width}d\", #{int})", state) + assert String.length(result) >= width + assert String.trim(result) == Integer.to_string(int) + end + end + + property "left-justify flag makes result left-aligned", %{state: state} do + check all( + int <- integer(0..999), + width <- integer(5..12) + ) do + result = run_format("return string.format(\"%-#{width}d\", #{int})", state) + assert String.length(result) >= width + # Left-justified: no leading spaces, trailing spaces + refute String.starts_with?(result, " ") + end + end + + property "zero-pad flag fills with zeros", %{state: state} do + check all( + int <- integer(0..999), + width <- integer(5..10) + ) do + result = run_format("return string.format(\"%0#{width}d\", #{int})", state) + assert String.length(result) == width + assert Regex.match?(~r/^[0-9]+$/, result) + assert String.to_integer(result) == int + end + end + + property "%.nf produces exactly n decimal places", %{state: state} do + check all( + int_part <- integer(-50..50), + precision <- integer(1..6) + ) do + float_val = int_part + 0.5 + result = run_format("return string.format(\"%.#{precision}f\", #{float_val})", state) + [_int, frac] = String.split(result, ".") + assert String.length(frac) == precision + end + end + + property "%.ns truncates string to at most n characters", %{state: state} do + check all( + str <- string(:ascii, min_length: 1, max_length: 20), + precision <- integer(1..10) + ) do + escaped = escape_string(str) + result = run_format("return string.format(\"%.#{precision}s\", \"#{escaped}\")", state) + assert String.length(result) <= precision + # Result should be a prefix of the original + assert String.starts_with?(str, result) + end + end + + property "%e always contains e+/e- notation", %{state: state} do + check all(val <- one_of([integer(1..10_000), float(min: 0.001, max: 10_000.0)])) do + result = run_format("return string.format(\"%e\", #{val})", state) + assert String.contains?(result, "e+") or String.contains?(result, "e-") + end + end + + property "%E always contains E+/E- notation", %{state: state} do + check all(val <- one_of([integer(1..10_000), float(min: 0.001, max: 10_000.0)])) do + result = run_format("return string.format(\"%E\", #{val})", state) + assert String.contains?(result, "E+") or String.contains?(result, "E-") + end + end + end + # Helper to escape strings for Lua code defp escape_string(str) do str