From bc200834c21b7b332836e9c9d0ecc4977354bd22 Mon Sep 17 00:00:00 2001 From: Oliver Schneider Date: Sat, 14 Nov 2015 22:16:01 +0000 Subject: [PATCH 1/4] Adding support for Visual Studio 2015 projects and solutions. Does not add the VisualStudioVersion and MinimumVisualStudioVersion in solution files, but VS2015 opens those fine regardless and picks them up first even when VS2010, 2012 and 2013 are installed as well. Added corresponding tests and verified that all is as expected. --- .hgignore | 3 + src/_manifest.lua | 1 + src/actions/vstudio/vs2010_vcxproj.lua | 2 +- src/actions/vstudio/vs2015.lua | 57 +++++++++++++++++++ tests/actions/vstudio/sln2005/header.lua | 17 ++++++ .../vstudio/vc2010/test_config_props.lua | 13 +++++ 6 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 src/actions/vstudio/vs2015.lua diff --git a/.hgignore b/.hgignore index a5dcc88c..0b11b56d 100644 --- a/.hgignore +++ b/.hgignore @@ -35,3 +35,6 @@ Scratchpad.txt Unix Worksheet.worksheet project.bbprojectdata Premake4.tmproj + +.git +.git/** diff --git a/src/_manifest.lua b/src/_manifest.lua index ee07f81f..19f497a7 100644 --- a/src/_manifest.lua +++ b/src/_manifest.lua @@ -65,6 +65,7 @@ "actions/vstudio/vs2010_vcxproj_filters.lua", "actions/vstudio/vs2012.lua", "actions/vstudio/vs2013.lua", + "actions/vstudio/vs2015.lua", -- Xcode action "actions/xcode/_xcode.lua", diff --git a/src/actions/vstudio/vs2010_vcxproj.lua b/src/actions/vstudio/vs2010_vcxproj.lua index 4adef794..dcff2200 100644 --- a/src/actions/vstudio/vs2010_vcxproj.lua +++ b/src/actions/vstudio/vs2010_vcxproj.lua @@ -79,7 +79,7 @@ _p(2,'%s', iif(optimisation(cfg) == "Disabled","true","false")) _p(2,'%s',iif(cfg.flags.Unicode,"Unicode","MultiByte")) - local toolsets = { vs2012 = "v110", vs2013 = "v120" } + local toolsets = { vs2012 = "v110", vs2013 = "v120", vs2015 = "v140" } local toolset = toolsets[_ACTION] if toolset then _p(2,'%s', toolset) diff --git a/src/actions/vstudio/vs2015.lua b/src/actions/vstudio/vs2015.lua new file mode 100644 index 00000000..8fa0ca94 --- /dev/null +++ b/src/actions/vstudio/vs2015.lua @@ -0,0 +1,57 @@ +-- +-- vs2015.lua +-- Baseline support for Visual Studio 2015. +-- Copyright (c) 2013 Jason Perkins and the Premake project +-- + + premake.vstudio.vc2015 = {} + local vc2015 = premake.vstudio.vc2015 + local vstudio = premake.vstudio + + +--- +-- Register a command-line action for Visual Studio 2015. +--- + + newaction + { + trigger = "vs2015", + shortname = "Visual Studio 2015", + description = "Generate Microsoft Visual Studio 2015 project files", + os = "windows", + + valid_kinds = { "ConsoleApp", "WindowedApp", "StaticLib", "SharedLib" }, + + valid_languages = { "C", "C++", "C#"}, + + valid_tools = { + cc = { "msc" }, + dotnet = { "msnet" }, + }, + + onsolution = function(sln) + premake.generate(sln, "%%.sln", vstudio.sln2005.generate) + end, + + onproject = function(prj) + if premake.isdotnetproject(prj) then + premake.generate(prj, "%%.csproj", vstudio.cs2005.generate) + premake.generate(prj, "%%.csproj.user", vstudio.cs2005.generate_user) + else + premake.generate(prj, "%%.vcxproj", premake.vs2010_vcxproj) + premake.generate(prj, "%%.vcxproj.user", premake.vs2010_vcxproj_user) + premake.generate(prj, "%%.vcxproj.filters", vstudio.vc2010.generate_filters) + end + end, + + + oncleansolution = premake.vstudio.cleansolution, + oncleanproject = premake.vstudio.cleanproject, + oncleantarget = premake.vstudio.cleantarget, + + vstudio = { + solutionVersion = "12", + targetFramework = "4.5.2", + toolsVersion = "14.0", + } + } diff --git a/tests/actions/vstudio/sln2005/header.lua b/tests/actions/vstudio/sln2005/header.lua index 71e32f5d..a9596770 100755 --- a/tests/actions/vstudio/sln2005/header.lua +++ b/tests/actions/vstudio/sln2005/header.lua @@ -77,3 +77,20 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 ]] end + + function suite.On2015() + _ACTION = "vs2015" + prepare() + test.capture [[ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2015 + ]] +--[[ +VS2015 seems to add: + +VisualStudioVersion = 14.0.23107.0 +MinimumVisualStudioVersion = 10.0.40219.1 + +which don't seem to be mandatory, though. +]] + end diff --git a/tests/actions/vstudio/vc2010/test_config_props.lua b/tests/actions/vstudio/vc2010/test_config_props.lua index 50d78aec..2970ad48 100644 --- a/tests/actions/vstudio/vc2010/test_config_props.lua +++ b/tests/actions/vstudio/vc2010/test_config_props.lua @@ -76,3 +76,16 @@ ]] end + + function suite.structureIsCorrect_onDefaultValues_on2015() + _ACTION = "vs2015" + prepare() + test.capture [[ + + Application + true + MultiByte + v140 + + ]] + end From d121a9c882b0c2681038060c2168349332af44ae Mon Sep 17 00:00:00 2001 From: Oliver Schneider Date: Thu, 10 Feb 2022 00:14:20 +0100 Subject: [PATCH 2/4] Backporting VS2017 through VS2022 support There are also minor fixes, such as for CompileAs and other conditions. It actually also required changing two test cases, because implicitly the default are C++ projects, so adding .c files should add CompileAs, but didn't in the past. --- src/_manifest.lua | 3 + src/actions/vstudio/vs2005_csproj.lua | 2 +- src/actions/vstudio/vs2005_solution.lua | 4 + src/actions/vstudio/vs200x_vcproj.lua | 70 +++---- src/actions/vstudio/vs2010_vcxproj.lua | 34 ++-- src/actions/vstudio/vs2015.lua | 1 + src/actions/vstudio/vs2017.lua | 58 ++++++ src/actions/vstudio/vs2019.lua | 58 ++++++ src/actions/vstudio/vs2022.lua | 58 ++++++ src/host/os_getversion.c | 172 ++++++++++++------ src/host/os_is64bit.c | 18 +- src/host/os_match.c | 29 ++- src/host/os_uuid.c | 4 +- src/host/path_translate.c | 8 +- src/host/premake.c | 33 +++- tests/actions/vstudio/sln2005/header.lua | 63 ++++++- .../vstudio/vc2010/test_config_props.lua | 39 ++++ tests/actions/vstudio/vc2010/test_files.lua | 2 + tests/testfx.lua | 2 +- 19 files changed, 548 insertions(+), 110 deletions(-) create mode 100644 src/actions/vstudio/vs2017.lua create mode 100644 src/actions/vstudio/vs2019.lua create mode 100644 src/actions/vstudio/vs2022.lua diff --git a/src/_manifest.lua b/src/_manifest.lua index 19f497a7..30daba35 100644 --- a/src/_manifest.lua +++ b/src/_manifest.lua @@ -66,6 +66,9 @@ "actions/vstudio/vs2012.lua", "actions/vstudio/vs2013.lua", "actions/vstudio/vs2015.lua", + "actions/vstudio/vs2017.lua", + "actions/vstudio/vs2019.lua", + "actions/vstudio/vs2022.lua", -- Xcode action "actions/xcode/_xcode.lua", diff --git a/src/actions/vstudio/vs2005_csproj.lua b/src/actions/vstudio/vs2005_csproj.lua index 6b6238c8..0b45c82d 100644 --- a/src/actions/vstudio/vs2005_csproj.lua +++ b/src/actions/vstudio/vs2005_csproj.lua @@ -38,7 +38,7 @@ local basename = fname:sub(1, -9) local testname = basename .. ".xaml" if premake.findfile(prj, testname) then - return "SubTypeCode", path.getname(testname) + return "SubTypeCode", testname end else -- is there a *.Designer.cs file? diff --git a/src/actions/vstudio/vs2005_solution.lua b/src/actions/vstudio/vs2005_solution.lua index b0942db7..bbfa6853 100644 --- a/src/actions/vstudio/vs2005_solution.lua +++ b/src/actions/vstudio/vs2005_solution.lua @@ -40,7 +40,11 @@ function sln2005.header(sln) local action = premake.action.current() _p('Microsoft Visual Studio Solution File, Format Version %d.00', action.vstudio.solutionVersion) + if action.vstudio.shortSlnVersion ~= nil then + _p('# Visual Studio %s', action.vstudio.shortSlnVersion) + else _p('# Visual Studio %s', _ACTION:sub(3)) + end end diff --git a/src/actions/vstudio/vs200x_vcproj.lua b/src/actions/vstudio/vs200x_vcproj.lua index 3ef4e8bc..a36d8ae8 100644 --- a/src/actions/vstudio/vs200x_vcproj.lua +++ b/src/actions/vstudio/vs200x_vcproj.lua @@ -102,36 +102,9 @@ _p(3,'>') end + vc200x.individualSourceFileOptions = nil --- --- Write out the element. --- - - function vc200x.Files(prj) - local tr = premake.project.buildsourcetree(prj) - - tree.traverse(tr, { - -- folders are handled at the internal nodes - onbranchenter = function(node, depth) - _p(depth, '') - end, - - onbranchexit = function(node, depth) - _p(depth, '') - end, - - -- source files are handled at the leaves - onleaf = function(node, depth) - local fname = node.cfg.name - - _p(depth, '') - depth = depth + 1 - + function vc200x.individualSourceFile(prj, depth, fname, node) -- handle file configuration stuff. This needs to be cleaned up and simplified. -- configurations are cached, so this isn't as bad as it looks for _, cfginfo in ipairs(prj.solution.vstudio_configs) do @@ -142,7 +115,7 @@ local isSourceCode = path.iscppfile(fname) local needsCompileAs = (path.iscfile(fname) ~= premake.project.iscproject(prj)) - if usePCH or (isSourceCode and needsCompileAs) then + if usePCH or (isSourceCode and needsCompileAs) or (type(vc200x.individualSourceFileOptions) == 'function') then _p(depth, '') @@ -166,6 +139,9 @@ _p(depth, '\t\tUsePrecompiledHeader="1"') end end + if (type(vc200x.individualSourceFileOptions) == 'function') then + vc200x.individualSourceFileOptions(prj, depth, fname, node) + end _p(depth, '\t/>') _p(depth, '') @@ -173,6 +149,38 @@ end end + end + +-- +-- Write out the element. +-- + + function vc200x.Files(prj) + local tr = premake.project.buildsourcetree(prj) + + tree.traverse(tr, { + -- folders are handled at the internal nodes + onbranchenter = function(node, depth) + _p(depth, '') + end, + + onbranchexit = function(node, depth) + _p(depth, '') + end, + + -- source files are handled at the leaves + onleaf = function(node, depth) + local fname = node.cfg.name + + _p(depth, '') + depth = depth + 1 + + vc200x.individualSourceFile(prj, depth, fname, node) depth = depth - 1 _p(depth, '') @@ -388,7 +396,7 @@ end if (cfg.kind == "ConsoleApp" or cfg.kind == "WindowedApp") and not cfg.flags.WinMain then - _p(4,'EntryPointSymbol="mainCRTStartup"') + _p(4,'EntryPointSymbol="%s"', iif(cfg.flags.Unicode, "wmainCRTStartup", "mainCRTStartup")) end if cfg.kind == "SharedLib" then diff --git a/src/actions/vstudio/vs2010_vcxproj.lua b/src/actions/vstudio/vs2010_vcxproj.lua index dcff2200..6af05dbc 100644 --- a/src/actions/vstudio/vs2010_vcxproj.lua +++ b/src/actions/vstudio/vs2010_vcxproj.lua @@ -79,7 +79,7 @@ _p(2,'%s', iif(optimisation(cfg) == "Disabled","true","false")) _p(2,'%s',iif(cfg.flags.Unicode,"Unicode","MultiByte")) - local toolsets = { vs2012 = "v110", vs2013 = "v120", vs2015 = "v140" } + local toolsets = { vs2012 = "v110", vs2013 = "v120", vs2015 = "v140", vs2017 = "v141", vs2019 = "v142", vs2022 = "v143" } local toolset = toolsets[_ACTION] if toolset then _p(2,'%s', toolset) @@ -417,7 +417,7 @@ end if vc2010.config_type(cfg) == 'Application' and not cfg.flags.WinMain and not cfg.flags.Managed then - _p(3,'mainCRTStartup') + _p(3,'%s', iif(cfg.flags.Unicode, "wmainCRTStartup", "mainCRTStartup")) end import_lib(cfg) @@ -526,6 +526,26 @@ end end + vc2010.individualSourceFileOptions = nil + + function vc2010.individualSourceFile(prj, config_mappings, file) + local configs = prj.solution.vstudio_configs + local translatedpath = path.translate(file.name, "\\") + _p(2,'', translatedpath) + for _, cfginfo in ipairs(configs) do + if config_mappings[cfginfo] and translatedpath == config_mappings[cfginfo] then + _p(3,'Create', premake.esc(cfginfo.name)) + config_mappings[cfginfo] = nil --only one source file per pch + end + end + if path.iscfile(file.name) ~= premake.project.iscproject(prj) then + _p(3,'%s', iif(path.iscfile(file.name), 'CompileAsC', 'CompileAsCpp')) + end + if (type(vc2010.individualSourceFileOptions) == 'function') then + vc2010.individualSourceFileOptions(prj, config_mappings, file) + end + _p(2,'') + end function vc2010.compilerfilesgroup(prj) local configs = prj.solution.vstudio_configs @@ -541,15 +561,7 @@ _p(1,'') for _, file in ipairs(files) do - local translatedpath = path.translate(file.name, "\\") - _p(2,'', translatedpath) - for _, cfginfo in ipairs(configs) do - if config_mappings[cfginfo] and translatedpath == config_mappings[cfginfo] then - _p(3,'Create', premake.esc(cfginfo.name)) - config_mappings[cfginfo] = nil --only one source file per pch - end - end - _p(2,'') + vc2010.individualSourceFile(prj, config_mappings, file) end _p(1,'') end diff --git a/src/actions/vstudio/vs2015.lua b/src/actions/vstudio/vs2015.lua index 8fa0ca94..9a8ba023 100644 --- a/src/actions/vstudio/vs2015.lua +++ b/src/actions/vstudio/vs2015.lua @@ -53,5 +53,6 @@ solutionVersion = "12", targetFramework = "4.5.2", toolsVersion = "14.0", + shortSlnVersion = "14", } } diff --git a/src/actions/vstudio/vs2017.lua b/src/actions/vstudio/vs2017.lua new file mode 100644 index 00000000..0bc0a023 --- /dev/null +++ b/src/actions/vstudio/vs2017.lua @@ -0,0 +1,58 @@ +-- +-- vs2017.lua +-- Baseline support for Visual Studio 2017. +-- Copyright (c) 2013 Jason Perkins and the Premake project +-- + + premake.vstudio.vc2017 = {} + local vc2017 = premake.vstudio.vc2017 + local vstudio = premake.vstudio + + +--- +-- Register a command-line action for Visual Studio 2017. +--- + + newaction + { + trigger = "vs2017", + shortname = "Visual Studio 2017", + description = "Generate Microsoft Visual Studio 2017 project files", + os = "windows", + + valid_kinds = { "ConsoleApp", "WindowedApp", "StaticLib", "SharedLib" }, + + valid_languages = { "C", "C++", "C#"}, + + valid_tools = { + cc = { "msc" }, + dotnet = { "msnet" }, + }, + + onsolution = function(sln) + premake.generate(sln, "%%.sln", vstudio.sln2005.generate) + end, + + onproject = function(prj) + if premake.isdotnetproject(prj) then + premake.generate(prj, "%%.csproj", vstudio.cs2005.generate) + premake.generate(prj, "%%.csproj.user", vstudio.cs2005.generate_user) + else + premake.generate(prj, "%%.vcxproj", premake.vs2010_vcxproj) + premake.generate(prj, "%%.vcxproj.user", premake.vs2010_vcxproj_user) + premake.generate(prj, "%%.vcxproj.filters", vstudio.vc2010.generate_filters) + end + end, + + + oncleansolution = premake.vstudio.cleansolution, + oncleanproject = premake.vstudio.cleanproject, + oncleantarget = premake.vstudio.cleantarget, + + vstudio = { + solutionVersion = "12", + targetFramework = "4.5.2", + toolsVersion = "15.0", + shortSlnVersion = "15", + } + } diff --git a/src/actions/vstudio/vs2019.lua b/src/actions/vstudio/vs2019.lua new file mode 100644 index 00000000..acfb9247 --- /dev/null +++ b/src/actions/vstudio/vs2019.lua @@ -0,0 +1,58 @@ +-- +-- vs2019.lua +-- Baseline support for Visual Studio 2019. +-- Copyright (c) 2013 Jason Perkins and the Premake project +-- + + premake.vstudio.vc2019 = {} + local vc2019 = premake.vstudio.vc2019 + local vstudio = premake.vstudio + + +--- +-- Register a command-line action for Visual Studio 2019. +--- + + newaction + { + trigger = "vs2019", + shortname = "Visual Studio 2019", + description = "Generate Microsoft Visual Studio 2019 project files", + os = "windows", + + valid_kinds = { "ConsoleApp", "WindowedApp", "StaticLib", "SharedLib" }, + + valid_languages = { "C", "C++", "C#"}, + + valid_tools = { + cc = { "msc" }, + dotnet = { "msnet" }, + }, + + onsolution = function(sln) + premake.generate(sln, "%%.sln", vstudio.sln2005.generate) + end, + + onproject = function(prj) + if premake.isdotnetproject(prj) then + premake.generate(prj, "%%.csproj", vstudio.cs2005.generate) + premake.generate(prj, "%%.csproj.user", vstudio.cs2005.generate_user) + else + premake.generate(prj, "%%.vcxproj", premake.vs2010_vcxproj) + premake.generate(prj, "%%.vcxproj.user", premake.vs2010_vcxproj_user) + premake.generate(prj, "%%.vcxproj.filters", vstudio.vc2010.generate_filters) + end + end, + + + oncleansolution = premake.vstudio.cleansolution, + oncleanproject = premake.vstudio.cleanproject, + oncleantarget = premake.vstudio.cleantarget, + + vstudio = { + solutionVersion = "12", + targetFramework = "4.7", + toolsVersion = "16.0", + shortSlnVersion = "16", + } + } diff --git a/src/actions/vstudio/vs2022.lua b/src/actions/vstudio/vs2022.lua new file mode 100644 index 00000000..0df82ae1 --- /dev/null +++ b/src/actions/vstudio/vs2022.lua @@ -0,0 +1,58 @@ +-- +-- vs2022.lua +-- Baseline support for Visual Studio 2022. +-- Copyright (c) 2013 Jason Perkins and the Premake project +-- + + premake.vstudio.vc2022 = {} + local vc2022 = premake.vstudio.vc2022 + local vstudio = premake.vstudio + + +--- +-- Register a command-line action for Visual Studio 2022. +--- + + newaction + { + trigger = "vs2022", + shortname = "Visual Studio 2022", + description = "Generate Microsoft Visual Studio 2022 project files", + os = "windows", + + valid_kinds = { "ConsoleApp", "WindowedApp", "StaticLib", "SharedLib" }, + + valid_languages = { "C", "C++", "C#"}, + + valid_tools = { + cc = { "msc" }, + dotnet = { "msnet" }, + }, + + onsolution = function(sln) + premake.generate(sln, "%%.sln", vstudio.sln2005.generate) + end, + + onproject = function(prj) + if premake.isdotnetproject(prj) then + premake.generate(prj, "%%.csproj", vstudio.cs2005.generate) + premake.generate(prj, "%%.csproj.user", vstudio.cs2005.generate_user) + else + premake.generate(prj, "%%.vcxproj", premake.vs2010_vcxproj) + premake.generate(prj, "%%.vcxproj.user", premake.vs2010_vcxproj_user) + premake.generate(prj, "%%.vcxproj.filters", vstudio.vc2010.generate_filters) + end + end, + + + oncleansolution = premake.vstudio.cleansolution, + oncleanproject = premake.vstudio.cleanproject, + oncleantarget = premake.vstudio.cleantarget, + + vstudio = { + solutionVersion = "12", + targetFramework = "4.7", + toolsVersion = "17.0", + shortSlnVersion = "17", + } + } diff --git a/src/host/os_getversion.c b/src/host/os_getversion.c index 446848ff..c0787b52 100755 --- a/src/host/os_getversion.c +++ b/src/host/os_getversion.c @@ -63,82 +63,148 @@ int os_getversion(lua_State* L) SYSTEM_INFO getsysteminfo() { - typedef void (WINAPI *GetNativeSystemInfoSig)(LPSYSTEM_INFO); - GetNativeSystemInfoSig nativeSystemInfo = (GetNativeSystemInfoSig) - GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetNativeSystemInfo"); + static SYSTEM_INFO systemInfo; + HMODULE hKrnl32 = GetModuleHandle(TEXT("kernel32")); + memset(&systemInfo, 0, sizeof(systemInfo)); + if (hKrnl32) + { + typedef void (WINAPI* GetNativeSystemInfoSig)(LPSYSTEM_INFO); + GetNativeSystemInfoSig nativeSystemInfo = (GetNativeSystemInfoSig)GetProcAddress(hKrnl32, "GetNativeSystemInfo"); - SYSTEM_INFO systemInfo = {{0}}; - if ( nativeSystemInfo ) nativeSystemInfo(&systemInfo); - else GetSystemInfo(&systemInfo); + if (nativeSystemInfo) + nativeSystemInfo(&systemInfo); + else + GetSystemInfo(&systemInfo); + } return systemInfo; } -void getversion(struct OsVersionInfo* info) -{ - OSVERSIONINFOEX versionInfo = {0}; - - versionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); - GetVersionEx((OSVERSIONINFO*)&versionInfo); - - info->majorversion = versionInfo.dwMajorVersion; - info->minorversion = versionInfo.dwMinorVersion; - info->revision = versionInfo.wServicePackMajor; +#ifndef NT_SUCCESS +# define NT_SUCCESS(x) ((x) >= 0) +#endif - if (versionInfo.dwMajorVersion == 5 && versionInfo.dwMinorVersion == 0) +OSVERSIONINFOEXW const * GetOSVersionInfo() +{ + static OSVERSIONINFOEXW* posvix = NULL; + if (!posvix) { - info->description = "Windows 2000"; + static OSVERSIONINFOEXW osvix = { sizeof(OSVERSIONINFOEXW), 0, 0, 0, 0,{ 0 } }; // not an error, this has to be the W variety! + static LONG(WINAPI * RtlGetVersion)(OSVERSIONINFOEXW*) = NULL; + static HMODULE hNtDll = NULL; + hNtDll = GetModuleHandle(TEXT("ntdll.dll")); + if (hNtDll) + { + *(FARPROC*)&RtlGetVersion = GetProcAddress(hNtDll, "RtlGetVersion"); + if (NULL != RtlGetVersion) + { + if (NT_SUCCESS(RtlGetVersion(&osvix))) + { + posvix = &osvix; } - else if (versionInfo.dwMajorVersion == 5 && versionInfo.dwMinorVersion == 1) - { - info->description = "Windows XP"; } - else if (versionInfo.dwMajorVersion == 5 && versionInfo.dwMinorVersion == 2) - { - SYSTEM_INFO systemInfo = getsysteminfo(); - if (versionInfo.wProductType == VER_NT_WORKSTATION && - systemInfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) - { - info->description = "Windows XP Professional x64"; } - else if (versionInfo.wSuiteMask & VER_SUITE_WH_SERVER) - { - info->description = "Windows Home Server"; } - else if (GetSystemMetrics(SM_SERVERR2) == 0) + return posvix; +} + +void getversion(struct OsVersionInfo* info) +{ + static OSVERSIONINFOEXW const* posvix = NULL; + static struct OsVersionInfo s_info; + s_info.majorversion = 0; + s_info.minorversion = 0; + s_info.revision = 0; + s_info.description = "Windows"; + + if (!posvix) + { + posvix = GetOSVersionInfo(); + if (posvix) { - info->description = "Windows Server 2003"; - } + s_info.majorversion = posvix->dwMajorVersion; + s_info.minorversion = posvix->dwMinorVersion; + s_info.revision = posvix->wServicePackMajor; + switch (posvix->dwMajorVersion) + { + case 5: + switch (posvix->dwMinorVersion) + { + case 0: + s_info.description = "Windows 2000"; + break; + case 1: + s_info.description = "Windows XP"; + break; + case 2: + if (posvix->wProductType == VER_NT_WORKSTATION) + s_info.description = "Windows XP x64"; else + if (posvix->wSuiteMask == VER_SUITE_WH_SERVER) + s_info.description = "Windows Home Server"; + else { - info->description = "Windows Server 2003 R2"; + if (GetSystemMetrics(SM_SERVERR2) == 0) + s_info.description = "Windows Server 2003"; + else + s_info.description = "Windows Server 2003 R2"; } + break; + default: + s_info.description = "Windows [5.x]"; + break; } - else if (versionInfo.dwMajorVersion == 6 && versionInfo.dwMinorVersion == 0) - { - if (versionInfo.wProductType == VER_NT_WORKSTATION) + break; + case 6: + switch (posvix->dwMinorVersion) { - info->description = "Windows Vista"; - } + case 0: + if (posvix->wProductType == VER_NT_WORKSTATION) + s_info.description = "Windows Vista"; else - { - info->description = "Windows Server 2008"; - } + s_info.description = "Windows Server 2008"; + break; + case 1: + if (posvix->wProductType == VER_NT_WORKSTATION) + s_info.description = "Windows 7"; + else + s_info.description = "Windows Server 2008 R2"; + break; + case 2: + if (posvix->wProductType == VER_NT_WORKSTATION) + s_info.description = "Windows 8"; + else + s_info.description = "Windows Server 2012"; + break; + case 3: + if (posvix->wProductType == VER_NT_WORKSTATION) + s_info.description = "Windows 8.1"; + else + s_info.description = "Windows Server 2012 R2"; + break; + default: + s_info.description = "Windows [6.x]"; + break; } - else if (versionInfo.dwMajorVersion == 6 && versionInfo.dwMinorVersion == 1 ) - { - if (versionInfo.wProductType != VER_NT_WORKSTATION) + break; + case 10: + switch (posvix->dwMinorVersion) { - info->description = "Windows Server 2008 R2"; - } + case 0: + if (posvix->wProductType == VER_NT_WORKSTATION) + s_info.description = "Windows 10"; else - { - info->description = "Windows 7"; + s_info.description = "Windows Server 2016/2019"; + break; + default: + s_info.description = "Windows [10.x]"; + break; } + break; + } } - else - { - info->description = "Windows"; } + + memmove(info, &s_info, sizeof(struct OsVersionInfo)); } /*************************************************************/ diff --git a/src/host/os_is64bit.c b/src/host/os_is64bit.c index 31347512..e572dbc0 100755 --- a/src/host/os_is64bit.c +++ b/src/host/os_is64bit.c @@ -6,14 +6,27 @@ #include "premake.h" +#if PLATFORM_WINDOWS +typedef BOOL(WINAPI* WowFuncSig)(HANDLE, PBOOL); +#endif + int os_is64bit(lua_State* L) { +#if PLATFORM_WINDOWS + HMODULE hKrnl32 = GetModuleHandle(TEXT("kernel32")); +#endif + if (sizeof(void*) == 8) // our premake build is 64-bit, so the runtime environment must be also (at least) 64-bit ... + { + lua_pushboolean(L, 1); + return 1; + } // If this code returns true, then the platform is 64-bit. If it // returns false, the platform might still be 64-bit, but more // checking will need to be done on the Lua side of things. #if PLATFORM_WINDOWS - typedef BOOL (WINAPI* WowFuncSig)(HANDLE, PBOOL); - WowFuncSig func = (WowFuncSig)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "IsWow64Process"); + if (hKrnl32) + { + WowFuncSig func = (WowFuncSig)GetProcAddress(hKrnl32, "IsWow64Process"); if (func) { BOOL isWow = FALSE; @@ -21,6 +34,7 @@ int os_is64bit(lua_State* L) { lua_pushboolean(L, isWow); return 1; + } } } #endif diff --git a/src/host/os_match.c b/src/host/os_match.c index 5ccc04d9..e307ab07 100644 --- a/src/host/os_match.c +++ b/src/host/os_match.c @@ -8,6 +8,17 @@ #include #include "premake.h" +static int skip_dot_entries(const char* name) +{ + if (name[0] == '.') + { + if (name[1] == '\0') + return 1; + if (name[1] == '.' && name[2] == '\0') + return 1; + } + return 0; +} #if PLATFORM_WINDOWS @@ -25,9 +36,16 @@ int os_matchstart(lua_State* L) { const char* mask = luaL_checkstring(L, 1); MatchInfo* m = (MatchInfo*)malloc(sizeof(MatchInfo)); - m->handle = FindFirstFile(mask, &m->entry); + if (m) + { + m->handle = FindFirstFile(mask, &m->entry); /* error handling happens in os_matchnext() below */ m->is_first = 1; lua_pushlightuserdata(L, m); + } + else + { + lua_pushnil(L); + } return 1; } @@ -57,9 +75,8 @@ int os_matchisfile(lua_State* L) int os_matchnext(lua_State* L) { MatchInfo* m = (MatchInfo*)lua_touserdata(L, 1); - if (m->handle == INVALID_HANDLE_VALUE) { + if (m->handle == INVALID_HANDLE_VALUE) return 0; - } while (m) /* loop forever */ { @@ -70,6 +87,10 @@ int os_matchnext(lua_State* L) } m->is_first = 0; + /* Ignore the directory entries for . and .. only */ + if (m->entry.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + if (skip_dot_entries(m->entry.cFileName)) + continue; lua_pushboolean(L, 1); return 1; } @@ -167,6 +188,8 @@ int os_matchnext(lua_State* L) while (m->entry != NULL) { const char* name = m->entry->d_name; + /* Ignore the directory entries for . and .. only */ + if (!skip_dot_entries(name)) if (fnmatch(m->mask, name, 0) == 0) { lua_pushboolean(L, 1); diff --git a/src/host/os_uuid.c b/src/host/os_uuid.c index 28be4e82..e3bee6b8 100644 --- a/src/host/os_uuid.c +++ b/src/host/os_uuid.c @@ -15,7 +15,9 @@ int os_uuid(lua_State* L) char uuid[38]; #if PLATFORM_WINDOWS - CoCreateGuid((GUID*)bytes); + HRESULT hr = CoCreateGuid((GUID*)bytes); + if (FAILED(hr)) + return 0; #else int result; diff --git a/src/host/path_translate.c b/src/host/path_translate.c index 8996b37c..0833341b 100644 --- a/src/host/path_translate.c +++ b/src/host/path_translate.c @@ -43,9 +43,11 @@ int path_translate(lua_State* L) lua_newtable(L); lua_pushnil(L); while (lua_next(L, 1)) { - const char* value = luaL_checkstring(L, 4); - translate(buffer, value, sep[0]); - lua_pop(L, 1); + const char* key; + lua_pushvalue(L, 4); // copy the key + key = luaL_checkstring(L, 5); + translate(buffer, key, sep[0]); + lua_pop(L, 2); lua_pushstring(L, buffer); lua_rawseti(L, -3, ++i); diff --git a/src/host/premake.c b/src/host/premake.c index 9e30d509..ac4bc04d 100755 --- a/src/host/premake.c +++ b/src/host/premake.c @@ -322,12 +322,22 @@ static int load_file_scripts(lua_State* L) if (lua_isnil(L, -1)) { + /* call os.pathsearch() to locate _premake_main.lua */ + lua_pushcfunction(L, os_pathsearch); + lua_pushstring(L, "_premake_main.lua"); + lua_pushstring(L, "src"); + lua_pushstring(L, getenv("PREMAKE_PATH")); + lua_call(L, 3, 1); + + if (lua_isnil(L, -1)) + { printf(ERROR_MESSAGE, - "Unable to find _premake_main.lua; use /scripts option when in debug mode!\n" + "Unable to find _premake_main.lua; use --scripts option when in debug mode!\n" "Please refer to the documentation (or build in release mode instead)." ); return !OKAY; } + } /* run the bootstrapping script */ scripts_path = lua_tostring(L, -1); @@ -356,6 +366,10 @@ static int load_file_scripts(lua_State* L) } } +extern const char* builtin_script_fnames[]; + +#define luaL_dobuffer(L, s, n) \ + (luaL_loadbuffer(L, s, strlen(s), n) || lua_pcall(L, 0, LUA_MULTRET, 0)) /** * When running in release mode, the scripts are loaded from a static data @@ -367,16 +381,31 @@ static int load_builtin_scripts(lua_State* L) int i; for (i = 0; builtin_scripts[i]; ++i) { + if (builtin_script_fnames[i]) + { + if (luaL_dobuffer(L, builtin_scripts[i], builtin_script_fnames[i]) != OKAY) + { + printf(ERROR_MESSAGE, lua_tostring(L, -1)); + return !OKAY; + } + } + else + { if (luaL_dostring(L, builtin_scripts[i]) != OKAY) { printf(ERROR_MESSAGE, lua_tostring(L, -1)); return !OKAY; } } + } + + /* in release mode, also show full traceback on all errors */ + lua_getglobal(L, "debug"); + lua_getfield(L, -1, "traceback"); /* hand off control to the scripts */ lua_getglobal(L, "_premake_main"); - if (lua_pcall(L, 0, 1, 0) != OKAY) + if (lua_pcall(L, 0, 1, -2) != OKAY) { printf(ERROR_MESSAGE, lua_tostring(L, -1)); return !OKAY; diff --git a/tests/actions/vstudio/sln2005/header.lua b/tests/actions/vstudio/sln2005/header.lua index a9596770..61aae752 100755 --- a/tests/actions/vstudio/sln2005/header.lua +++ b/tests/actions/vstudio/sln2005/header.lua @@ -76,6 +76,14 @@ Microsoft Visual Studio Solution File, Format Version 12.00 Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 ]] +--[[ +VS 2013 seems to add something like: + +VisualStudioVersion = 12.0.31101.0 +MinimumVisualStudioVersion = 10.0.40219.1 + +which don't seem to be mandatory, though. +]] end function suite.On2015() @@ -83,14 +91,65 @@ Microsoft Visual Studio Solution File, Format Version 12.00 prepare() test.capture [[ Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 2015 +# Visual Studio 14 ]] --[[ -VS2015 seems to add: +VS 2015 seems to add something like: VisualStudioVersion = 14.0.23107.0 MinimumVisualStudioVersion = 10.0.40219.1 +which don't seem to be mandatory, though. +]] + end + + function suite.On2017() + _ACTION = "vs2017" + prepare() + test.capture [[ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 + ]] +--[[ +VS 2017 seems to add something like: + +VisualStudioVersion = 15.0.26228.4 +MinimumVisualStudioVersion = 10.0.40219.1 + +which don't seem to be mandatory, though. +]] + end + + function suite.On2019() + _ACTION = "vs2019" + prepare() + test.capture [[ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 16 + ]] +--[[ +VS 2019 seems to add something like: + +VisualStudioVersion = 16.0.29411.108 +MinimumVisualStudioVersion = 10.0.40219.1 + +which don't seem to be mandatory, though. +]] + end + + function suite.On2022() + _ACTION = "vs2022" + prepare() + test.capture [[ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 17 + ]] +--[[ +VS 2022 seems to add something like: + +VisualStudioVersion = 17.0.0.0 +MinimumVisualStudioVersion = 10.0.40219.1 + which don't seem to be mandatory, though. ]] end diff --git a/tests/actions/vstudio/vc2010/test_config_props.lua b/tests/actions/vstudio/vc2010/test_config_props.lua index 2970ad48..a5b05e1e 100644 --- a/tests/actions/vstudio/vc2010/test_config_props.lua +++ b/tests/actions/vstudio/vc2010/test_config_props.lua @@ -89,3 +89,42 @@ ]] end + + function suite.structureIsCorrect_onDefaultValues_on2017() + _ACTION = "vs2017" + prepare() + test.capture [[ + + Application + true + MultiByte + v141 + + ]] + end + + function suite.structureIsCorrect_onDefaultValues_on2019() + _ACTION = "vs2019" + prepare() + test.capture [[ + + Application + true + MultiByte + v142 + + ]] + end + + function suite.structureIsCorrect_onDefaultValues_on2022() + _ACTION = "vs2022" + prepare() + test.capture [[ + + Application + true + MultiByte + v143 + + ]] + end diff --git a/tests/actions/vstudio/vc2010/test_files.lua b/tests/actions/vstudio/vc2010/test_files.lua index 6cd1a164..330c35ef 100755 --- a/tests/actions/vstudio/vc2010/test_files.lua +++ b/tests/actions/vstudio/vc2010/test_files.lua @@ -48,6 +48,7 @@ test.capture [[ + CompileAsC ]] @@ -86,6 +87,7 @@ test.capture [[ + CompileAsC ]] diff --git a/tests/testfx.lua b/tests/testfx.lua index 83420861..5a0f33be 100644 --- a/tests/testfx.lua +++ b/tests/testfx.lua @@ -23,7 +23,7 @@ function test.string_does_not_contain(buffer, expected) if string.find(buffer,expected) then - test.fail("\n==Fail==: Did not expected to find :\n%s\nyet it was found in buffer:\n%s\n", expected,buffer) + test.fail("\n==Fail==: Did not expect to find :\n%s\nyet it was found in buffer:\n%s\n", expected,buffer) end end From d6c8633078270f5eeb317dd5f74f3ea13019b0df Mon Sep 17 00:00:00 2001 From: Oliver Schneider Date: Thu, 10 Feb 2022 00:35:41 +0100 Subject: [PATCH 3/4] Actually also backporting LuaSrcDiet which can save a lot in binary size --- scripts/embed.lua | 41 +- scripts/luasrcdiet/COPYRIGHT | 45 + scripts/luasrcdiet/COPYRIGHT_Lua51 | 34 + scripts/luasrcdiet/LuaSrcDiet.lua | 4615 ++++++++++++++++++++++++++ scripts/luasrcdiet/README.LuaSrcDiet | 140 + scripts/luasrcdiet/README.premake | 12 + 6 files changed, 4865 insertions(+), 22 deletions(-) create mode 100644 scripts/luasrcdiet/COPYRIGHT create mode 100644 scripts/luasrcdiet/COPYRIGHT_Lua51 create mode 100644 scripts/luasrcdiet/LuaSrcDiet.lua create mode 100644 scripts/luasrcdiet/README.LuaSrcDiet create mode 100644 scripts/luasrcdiet/README.premake diff --git a/scripts/embed.lua b/scripts/embed.lua index 014d2c5e..b02adec5 100644 --- a/scripts/embed.lua +++ b/scripts/embed.lua @@ -5,40 +5,29 @@ -- issues in Mac OS X Universal builds. -- - local function stripfile(fname) - local f = io.open(fname) - local s = assert(f:read("*a")) - f:close() - - -- strip tabs - s = s:gsub("[\t]", "") + local raw_sum = 0 + local trim_sum = 0 + local function stripfile(fname) + dofile("scripts/luasrcdiet/LuaSrcDiet.lua") + -- Let LuaSrcDiet do its job + local s,l = get_slim_luasrc(fname) + -- Now do some cleanup so we can write these out as C strings -- strip any CRs s = s:gsub("[\r]", "") - -- strip out block comments - s = s:gsub("[^\"']%-%-%[%[.-%]%]", "") - s = s:gsub("[^\"']%-%-%[=%[.-%]=%]", "") - s = s:gsub("[^\"']%-%-%[==%[.-%]==%]", "") - - -- strip out inline comments - s = s:gsub("\n%-%-[^\n]*", "\n") + -- overall counters + raw_sum = raw_sum + l:len() + trim_sum = trim_sum + s:len() -- escape backslashes s = s:gsub("\\", "\\\\") - -- strip duplicate line feeds - s = s:gsub("\n+", "\n") - - -- strip out leading comments - s = s:gsub("^%-%-[^\n]*\n", "") - -- escape line feeds s = s:gsub("\n", "\\n") -- escape double quote marks s = s:gsub("\"", "\\\"") - return s end @@ -77,6 +66,9 @@ function doembed() + raw_sum = 0 + trim_sum = 0 + fnames = "const char* builtin_script_fnames[] = {" -- load the manifest of script files scripts = dofile("src/_manifest.lua") @@ -93,9 +85,14 @@ for i,fn in ipairs(scripts) do print(fn) local s = stripfile("src/" .. fn) + fnames = fnames .. "\n" .. "\t\"@" .. fn .. "\"," writefile(out, fn, s) end - out:write("\t0\n};\n"); + out:write("\t0\n};\n\n"); + out:write(fnames); + out:write("\n\t0\n};\n"); + out:close() + print(string.format("Lua scripts trimmed down to %2.1f%% of original size (%d/%d)", (trim_sum / raw_sum) * 100, trim_sum, raw_sum)) end diff --git a/scripts/luasrcdiet/COPYRIGHT b/scripts/luasrcdiet/COPYRIGHT new file mode 100644 index 00000000..4afe0e37 --- /dev/null +++ b/scripts/luasrcdiet/COPYRIGHT @@ -0,0 +1,45 @@ +LuaSrcDiet License +------------------ + +LuaSrcDiet is licensed under the terms of the MIT license reproduced +below. This means that LuaSrcDiet is free software and can be used for +both academic and commercial purposes at absolutely no cost. + +Think of LuaSrcDiet as a compiler or a text filter; whatever that is +processed by LuaSrcDiet is not affected by its license. It does not add +anything new into your source code; it only transforms code that already +exist. + +Hence, there is no need to tag this license onto Lua programs that are +only processed. Given the liberal terms of this kind of license, the +primary purpose is just to claim authorship of LuaSrcDiet. + +Parts of LuaSrcDiet is based on Lua 5 code. See the file COPYRIGHT_Lua51 +(Lua 5.1.4) for Lua 5's license. + +=============================================================================== + +Copyright (C) 2005-2008,2011 Kein-Hong Man +Lua 5.1.4 Copyright (C) 1994-2008 Lua.org, PUC-Rio. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +=============================================================================== + +(end of COPYRIGHT) diff --git a/scripts/luasrcdiet/COPYRIGHT_Lua51 b/scripts/luasrcdiet/COPYRIGHT_Lua51 new file mode 100644 index 00000000..3a53e741 --- /dev/null +++ b/scripts/luasrcdiet/COPYRIGHT_Lua51 @@ -0,0 +1,34 @@ +Lua License +----------- + +Lua is licensed under the terms of the MIT license reproduced below. +This means that Lua is free software and can be used for both academic +and commercial purposes at absolutely no cost. + +For details and rationale, see http://www.lua.org/license.html . + +=============================================================================== + +Copyright (C) 1994-2008 Lua.org, PUC-Rio. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +=============================================================================== + +(end of COPYRIGHT) diff --git a/scripts/luasrcdiet/LuaSrcDiet.lua b/scripts/luasrcdiet/LuaSrcDiet.lua new file mode 100644 index 00000000..4feed224 --- /dev/null +++ b/scripts/luasrcdiet/LuaSrcDiet.lua @@ -0,0 +1,4615 @@ +#!/usr/bin/env lua +--[[-------------------------------------------------------------------- + + LuaSrcDiet + Compresses Lua source code by removing unnecessary characters. + For Lua 5.1.x source code. + + Copyright (c) 2008,2011,2012 Kein-Hong Man + The COPYRIGHT file describes the conditions + under which this software may be distributed. + +----------------------------------------------------------------------]] + +--[[-------------------------------------------------------------------- +-- NOTES: +-- * Remember to update version and date information below (MSG_TITLE) +-- * TODO: passing data tables around is a horrific mess +-- * TODO: to implement pcall() to properly handle lexer etc. errors +-- * TODO: need some automatic testing for a semblance of sanity +-- * TODO: the plugin module is highly experimental and unstable +----------------------------------------------------------------------]] + +-- standard libraries, functions +local string = string +local math = math +local table = table +local require = require +local print = print +local sub = string.sub +local gmatch = string.gmatch +local match = string.match + +-- modules incorporated as preload functions follows +local preload = package.preload +local base = _G + +local plugin_info = { + html = "html generates a HTML file for checking globals", + sloc = "sloc calculates SLOC for given source file", +} + +local p_embedded = { + 'html', + 'sloc', +} + +-- preload function for module llex +preload.llex = +function() +--start of inserted module +module "llex" + +local string = base.require "string" +local find = string.find +local match = string.match +local sub = string.sub + +---------------------------------------------------------------------- +-- initialize keyword list, variables +---------------------------------------------------------------------- + +local kw = {} +for v in string.gmatch([[ +and break do else elseif end false for function if in +local nil not or repeat return then true until while]], "%S+") do + kw[v] = true +end + +-- see init() for module variables (externally visible): +-- tok, seminfo, tokln + +local z, -- source stream + sourceid, -- name of source + I, -- position of lexer + buff, -- buffer for strings + ln -- line number + +---------------------------------------------------------------------- +-- add information to token listing +---------------------------------------------------------------------- + +local function addtoken(token, info) + local i = #tok + 1 + tok[i] = token + seminfo[i] = info + tokln[i] = ln +end + +---------------------------------------------------------------------- +-- handles line number incrementation and end-of-line characters +---------------------------------------------------------------------- + +local function inclinenumber(i, is_tok) + local sub = sub + local old = sub(z, i, i) + i = i + 1 -- skip '\n' or '\r' + local c = sub(z, i, i) + if (c == "\n" or c == "\r") and (c ~= old) then + i = i + 1 -- skip '\n\r' or '\r\n' + old = old..c + end + if is_tok then addtoken("TK_EOL", old) end + ln = ln + 1 + I = i + return i +end + +---------------------------------------------------------------------- +-- initialize lexer for given source _z and source name _sourceid +---------------------------------------------------------------------- + +function init(_z, _sourceid) + z = _z -- source + sourceid = _sourceid -- name of source + I = 1 -- lexer's position in source + ln = 1 -- line number + tok = {} -- lexed token list* + seminfo = {} -- lexed semantic information list* + tokln = {} -- line numbers for messages* + -- (*) externally visible thru' module + -------------------------------------------------------------------- + -- initial processing (shbang handling) + -------------------------------------------------------------------- + local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)") + if p then -- skip first line + I = I + #q + addtoken("TK_COMMENT", q) + if #r > 0 then inclinenumber(I, true) end + end +end + +---------------------------------------------------------------------- +-- returns a chunk name or id, no truncation for long names +---------------------------------------------------------------------- + +function chunkid() + if sourceid and match(sourceid, "^[=@]") then + return sub(sourceid, 2) -- remove first char + end + return "[string]" +end + +---------------------------------------------------------------------- +-- formats error message and throws error +-- * a simplified version, does not report what token was responsible +---------------------------------------------------------------------- + +function errorline(s, line) + local e = error or base.error + e(string.format("%s:%d: %s", chunkid(), line or ln, s)) +end +local errorline = errorline + +------------------------------------------------------------------------ +-- count separators ("=") in a long string delimiter +------------------------------------------------------------------------ + +local function skip_sep(i) + local sub = sub + local s = sub(z, i, i) + i = i + 1 + local count = #match(z, "=*", i) + i = i + count + I = i + return (sub(z, i, i) == s) and count or (-count) - 1 +end + +---------------------------------------------------------------------- +-- reads a long string or long comment +---------------------------------------------------------------------- + +local function read_long_string(is_str, sep) + local i = I + 1 -- skip 2nd '[' + local sub = sub + local c = sub(z, i, i) + if c == "\r" or c == "\n" then -- string starts with a newline? + i = inclinenumber(i) -- skip it + end + while true do + local p, q, r = find(z, "([\r\n%]])", i) -- (long range match) + if not p then + errorline(is_str and "unfinished long string" or + "unfinished long comment") + end + i = p + if r == "]" then -- delimiter test + if skip_sep(i) == sep then + buff = sub(z, buff, I) + I = I + 1 -- skip 2nd ']' + return buff + end + i = I + else -- newline + buff = buff.."\n" + i = inclinenumber(i) + end + end--while +end + +---------------------------------------------------------------------- +-- reads a string +---------------------------------------------------------------------- + +local function read_string(del) + local i = I + local find = find + local sub = sub + while true do + local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range match) + if p then + if r == "\n" or r == "\r" then + errorline("unfinished string") + end + i = p + if r == "\\" then -- handle escapes + i = i + 1 + r = sub(z, i, i) + if r == "" then break end -- (EOZ error) + p = find("abfnrtv\n\r", r, 1, true) + ------------------------------------------------------ + if p then -- special escapes + if p > 7 then + i = inclinenumber(i) + else + i = i + 1 + end + ------------------------------------------------------ + elseif find(r, "%D") then -- other non-digits + i = i + 1 + ------------------------------------------------------ + else -- \xxx sequence + local p, q, s = find(z, "^(%d%d?%d?)", i) + i = q + 1 + if s + 1 > 256 then -- UCHAR_MAX + errorline("escape sequence too large") + end + ------------------------------------------------------ + end--if p + else + i = i + 1 + if r == del then -- ending delimiter + I = i + return sub(z, buff, i - 1) -- return string + end + end--if r + else + break -- (error) + end--if p + end--while + errorline("unfinished string") +end + +------------------------------------------------------------------------ +-- main lexer function +------------------------------------------------------------------------ + +function llex() + local find = find + local match = match + while true do--outer + local i = I + -- inner loop allows break to be used to nicely section tests + while true do--inner + ---------------------------------------------------------------- + local p, _, r = find(z, "^([_%a][_%w]*)", i) + if p then + I = i + #r + if kw[r] then + addtoken("TK_KEYWORD", r) -- reserved word (keyword) + else + addtoken("TK_NAME", r) -- identifier + end + break -- (continue) + end + ---------------------------------------------------------------- + local p, _, r = find(z, "^(%.?)%d", i) + if p then -- numeral + if r == "." then i = i + 1 end + local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) + i = q + 1 + if #r == 1 then -- optional exponent + if match(z, "^[%+%-]", i) then -- optional sign + i = i + 1 + end + end + local _, q = find(z, "^[_%w]*", i) + I = q + 1 + local v = sub(z, p, q) -- string equivalent + if not base.tonumber(v) then -- handles hex test also + errorline("malformed number") + end + addtoken("TK_NUMBER", v) + break -- (continue) + end + ---------------------------------------------------------------- + local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i) + if p then + if t == "\n" or t == "\r" then -- newline + inclinenumber(i, true) + else + I = q + 1 -- whitespace + addtoken("TK_SPACE", r) + end + break -- (continue) + end + ---------------------------------------------------------------- + local r = match(z, "^%p", i) + if r then + buff = i + local p = find("-[\"\'.=<>~", r, 1, true) + if p then + -- two-level if block for punctuation/symbols + -------------------------------------------------------- + if p <= 2 then + if p == 1 then -- minus + local c = match(z, "^%-%-(%[?)", i) + if c then + i = i + 2 + local sep = -1 + if c == "[" then + sep = skip_sep(i) + end + if sep >= 0 then -- long comment + addtoken("TK_LCOMMENT", read_long_string(false, sep)) + else -- short comment + I = find(z, "[\n\r]", i) or (#z + 1) + addtoken("TK_COMMENT", sub(z, buff, I - 1)) + end + break -- (continue) + end + -- (fall through for "-") + else -- [ or long string + local sep = skip_sep(i) + if sep >= 0 then + addtoken("TK_LSTRING", read_long_string(true, sep)) + elseif sep == -1 then + addtoken("TK_OP", "[") + else + errorline("invalid long string delimiter") + end + break -- (continue) + end + -------------------------------------------------------- + elseif p <= 5 then + if p < 5 then -- strings + I = i + 1 + addtoken("TK_STRING", read_string(r)) + break -- (continue) + end + r = match(z, "^%.%.?%.?", i) -- .|..|... dots + -- (fall through) + -------------------------------------------------------- + else -- relational + r = match(z, "^%p=?", i) + -- (fall through) + end + end + I = i + #r + addtoken("TK_OP", r) -- for other symbols, fall through + break -- (continue) + end + ---------------------------------------------------------------- + local r = sub(z, i, i) + if r ~= "" then + I = i + 1 + addtoken("TK_OP", r) -- other single-char tokens + break + end + addtoken("TK_EOS", "") -- end of stream, + return -- exit here + ---------------------------------------------------------------- + end--while inner + end--while outer +end +--end of inserted module +end + +-- preload function for module lparser +preload.lparser = +function() +--start of inserted module +module "lparser" + +local string = base.require "string" + +--[[-------------------------------------------------------------------- +-- variable and data structure initialization +----------------------------------------------------------------------]] + +---------------------------------------------------------------------- +-- initialization: main variables +---------------------------------------------------------------------- + +local toklist, -- grammar-only token tables (token table, + seminfolist, -- semantic information table, line number + toklnlist, -- table, cross-reference table) + xreflist, + tpos, -- token position + + line, -- start line # for error messages + lastln, -- last line # for ambiguous syntax chk + tok, seminfo, ln, xref, -- token, semantic info, line + nameref, -- proper position of token + fs, -- current function state + top_fs, -- top-level function state + + globalinfo, -- global variable information table + globallookup, -- global variable name lookup table + localinfo, -- local variable information table + ilocalinfo, -- inactive locals (prior to activation) + ilocalrefs, -- corresponding references to activate + statinfo -- statements labeled by type + +-- forward references for local functions +local explist1, expr, block, exp1, body, chunk + +---------------------------------------------------------------------- +-- initialization: data structures +---------------------------------------------------------------------- + +local gmatch = string.gmatch + +local block_follow = {} -- lookahead check in chunk(), returnstat() +for v in gmatch("else elseif end until ", "%S+") do + block_follow[v] = true +end + +local binopr_left = {} -- binary operators, left priority +local binopr_right = {} -- binary operators, right priority +for op, lt, rt in gmatch([[ +{+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7} +{^ 10 9}{.. 5 4} +{~= 3 3}{== 3 3} +{< 3 3}{<= 3 3}{> 3 3}{>= 3 3} +{and 2 2}{or 1 1} +]], "{(%S+)%s(%d+)%s(%d+)}") do + binopr_left[op] = lt + 0 + binopr_right[op] = rt + 0 +end + +local unopr = { ["not"] = true, ["-"] = true, + ["#"] = true, } -- unary operators +local UNARY_PRIORITY = 8 -- priority for unary operators + +--[[-------------------------------------------------------------------- +-- support functions +----------------------------------------------------------------------]] + +---------------------------------------------------------------------- +-- formats error message and throws error (duplicated from llex) +-- * a simplified version, does not report what token was responsible +---------------------------------------------------------------------- + +local function errorline(s, line) + local e = error or base.error + e(string.format("(source):%d: %s", line or ln, s)) +end + +---------------------------------------------------------------------- +-- handles incoming token, semantic information pairs +-- * NOTE: 'nextt' is named 'next' originally +---------------------------------------------------------------------- + +-- reads in next token +local function nextt() + lastln = toklnlist[tpos] + tok, seminfo, ln, xref + = toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos] + tpos = tpos + 1 +end + +-- peek at next token (single lookahead for table constructor) +local function lookahead() + return toklist[tpos] +end + +---------------------------------------------------------------------- +-- throws a syntax error, or if token expected is not there +---------------------------------------------------------------------- + +local function syntaxerror(msg) + local tok = tok + if tok ~= "" and tok ~= "" then + if tok == "" then tok = seminfo end + tok = "'"..tok.."'" + end + errorline(msg.." near "..tok) +end + +local function error_expected(token) + syntaxerror("'"..token.."' expected") +end + +---------------------------------------------------------------------- +-- tests for a token, returns outcome +-- * return value changed to boolean +---------------------------------------------------------------------- + +local function testnext(c) + if tok == c then nextt(); return true end +end + +---------------------------------------------------------------------- +-- check for existence of a token, throws error if not found +---------------------------------------------------------------------- + +local function check(c) + if tok ~= c then error_expected(c) end +end + +---------------------------------------------------------------------- +-- verify existence of a token, then skip it +---------------------------------------------------------------------- + +local function checknext(c) + check(c); nextt() +end + +---------------------------------------------------------------------- +-- throws error if condition not matched +---------------------------------------------------------------------- + +local function check_condition(c, msg) + if not c then syntaxerror(msg) end +end + +---------------------------------------------------------------------- +-- verifies token conditions are met or else throw error +---------------------------------------------------------------------- + +local function check_match(what, who, where) + if not testnext(what) then + if where == ln then + error_expected(what) + else + syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")") + end + end +end + +---------------------------------------------------------------------- +-- expect that token is a name, return the name +---------------------------------------------------------------------- + +local function str_checkname() + check("") + local ts = seminfo + nameref = xref + nextt() + return ts +end + +---------------------------------------------------------------------- +-- adds given string s in string pool, sets e as VK +---------------------------------------------------------------------- + +local function codestring(e, s) + e.k = "VK" +end + +---------------------------------------------------------------------- +-- consume a name token, adds it to string pool +---------------------------------------------------------------------- + +local function checkname(e) + codestring(e, str_checkname()) +end + +--[[-------------------------------------------------------------------- +-- variable (global|local|upvalue) handling +-- * to track locals and globals, variable management code needed +-- * entry point is singlevar() for variable lookups +-- * lookup tables (bl.locallist) are maintained awkwardly in the basic +-- block data structures, PLUS the function data structure (this is +-- an inelegant hack, since bl is nil for the top level of a function) +----------------------------------------------------------------------]] + +---------------------------------------------------------------------- +-- register a local variable, create local variable object, set in +-- to-activate variable list +-- * used in new_localvarliteral(), parlist(), fornum(), forlist(), +-- localfunc(), localstat() +---------------------------------------------------------------------- + +local function new_localvar(name, special) + local bl = fs.bl + local locallist + -- locate locallist in current block object or function root object + if bl then + locallist = bl.locallist + else + locallist = fs.locallist + end + -- build local variable information object and set localinfo + local id = #localinfo + 1 + localinfo[id] = { -- new local variable object + name = name, -- local variable name + xref = { nameref }, -- xref, first value is declaration + decl = nameref, -- location of declaration, = xref[1] + } + if special then -- "self" must be not be changed + localinfo[id].isself = true + end + -- this can override a local with the same name in the same scope + -- but first, keep it inactive until it gets activated + local i = #ilocalinfo + 1 + ilocalinfo[i] = id + ilocalrefs[i] = locallist +end + +---------------------------------------------------------------------- +-- actually activate the variables so that they are visible +-- * remember Lua semantics, e.g. RHS is evaluated first, then LHS +-- * used in parlist(), forbody(), localfunc(), localstat(), body() +---------------------------------------------------------------------- + +local function adjustlocalvars(nvars) + local sz = #ilocalinfo + -- i goes from left to right, in order of local allocation, because + -- of something like: local a,a,a = 1,2,3 which gives a = 3 + while nvars > 0 do + nvars = nvars - 1 + local i = sz - nvars + local id = ilocalinfo[i] -- local's id + local obj = localinfo[id] + local name = obj.name -- name of local + obj.act = xref -- set activation location + ilocalinfo[i] = nil + local locallist = ilocalrefs[i] -- ref to lookup table to update + ilocalrefs[i] = nil + local existing = locallist[name] -- if existing, remove old first! + if existing then -- do not overlap, set special + obj = localinfo[existing] -- form of rem, as -id + obj.rem = -id + end + locallist[name] = id -- activate, now visible to Lua + end +end + +---------------------------------------------------------------------- +-- remove (deactivate) variables in current scope (before scope exits) +-- * zap entire locallist tables since we are not allocating registers +-- * used in leaveblock(), close_func() +---------------------------------------------------------------------- + +local function removevars() + local bl = fs.bl + local locallist + -- locate locallist in current block object or function root object + if bl then + locallist = bl.locallist + else + locallist = fs.locallist + end + -- enumerate the local list at current scope and deactivate 'em + for name, id in base.pairs(locallist) do + local obj = localinfo[id] + obj.rem = xref -- set deactivation location + end +end + +---------------------------------------------------------------------- +-- creates a new local variable given a name +-- * skips internal locals (those starting with '('), so internal +-- locals never needs a corresponding adjustlocalvars() call +-- * special is true for "self" which must not be optimized +-- * used in fornum(), forlist(), parlist(), body() +---------------------------------------------------------------------- + +local function new_localvarliteral(name, special) + if string.sub(name, 1, 1) == "(" then -- can skip internal locals + return + end + new_localvar(name, special) +end + +---------------------------------------------------------------------- +-- search the local variable namespace of the given fs for a match +-- * returns localinfo index +-- * used only in singlevaraux() +---------------------------------------------------------------------- + +local function searchvar(fs, n) + local bl = fs.bl + local locallist + if bl then + locallist = bl.locallist + while locallist do + if locallist[n] then return locallist[n] end -- found + bl = bl.prev + locallist = bl and bl.locallist + end + end + locallist = fs.locallist + return locallist[n] or -1 -- found or not found (-1) +end + +---------------------------------------------------------------------- +-- handle locals, globals and upvalues and related processing +-- * search mechanism is recursive, calls itself to search parents +-- * used only in singlevar() +---------------------------------------------------------------------- + +local function singlevaraux(fs, n, var) + if fs == nil then -- no more levels? + var.k = "VGLOBAL" -- default is global variable + return "VGLOBAL" + else + local v = searchvar(fs, n) -- look up at current level + if v >= 0 then + var.k = "VLOCAL" + var.id = v + -- codegen may need to deal with upvalue here + return "VLOCAL" + else -- not found at current level; try upper one + if singlevaraux(fs.prev, n, var) == "VGLOBAL" then + return "VGLOBAL" + end + -- else was LOCAL or UPVAL, handle here + var.k = "VUPVAL" -- upvalue in this level + return "VUPVAL" + end--if v + end--if fs +end + +---------------------------------------------------------------------- +-- consume a name token, creates a variable (global|local|upvalue) +-- * used in prefixexp(), funcname() +---------------------------------------------------------------------- + +local function singlevar(v) + local name = str_checkname() + singlevaraux(fs, name, v) + ------------------------------------------------------------------ + -- variable tracking + ------------------------------------------------------------------ + if v.k == "VGLOBAL" then + -- if global being accessed, keep track of it by creating an object + local id = globallookup[name] + if not id then + id = #globalinfo + 1 + globalinfo[id] = { -- new global variable object + name = name, -- global variable name + xref = { nameref }, -- xref, first value is declaration + } + globallookup[name] = id -- remember it + else + local obj = globalinfo[id].xref + obj[#obj + 1] = nameref -- add xref + end + else + -- local/upvalue is being accessed, keep track of it + local id = v.id + local obj = localinfo[id].xref + obj[#obj + 1] = nameref -- add xref + end +end + +--[[-------------------------------------------------------------------- +-- state management functions with open/close pairs +----------------------------------------------------------------------]] + +---------------------------------------------------------------------- +-- enters a code unit, initializes elements +---------------------------------------------------------------------- + +local function enterblock(isbreakable) + local bl = {} -- per-block state + bl.isbreakable = isbreakable + bl.prev = fs.bl + bl.locallist = {} + fs.bl = bl +end + +---------------------------------------------------------------------- +-- leaves a code unit, close any upvalues +---------------------------------------------------------------------- + +local function leaveblock() + local bl = fs.bl + removevars() + fs.bl = bl.prev +end + +---------------------------------------------------------------------- +-- opening of a function +-- * top_fs is only for anchoring the top fs, so that parser() can +-- return it to the caller function along with useful output +-- * used in parser() and body() +---------------------------------------------------------------------- + +local function open_func() + local new_fs -- per-function state + if not fs then -- top_fs is created early + new_fs = top_fs + else + new_fs = {} + end + new_fs.prev = fs -- linked list of function states + new_fs.bl = nil + new_fs.locallist = {} + fs = new_fs +end + +---------------------------------------------------------------------- +-- closing of a function +-- * used in parser() and body() +---------------------------------------------------------------------- + +local function close_func() + removevars() + fs = fs.prev +end + +--[[-------------------------------------------------------------------- +-- other parsing functions +-- * for table constructor, parameter list, argument list +----------------------------------------------------------------------]] + +---------------------------------------------------------------------- +-- parse a function name suffix, for function call specifications +-- * used in primaryexp(), funcname() +---------------------------------------------------------------------- + +local function field(v) + -- field -> ['.' | ':'] NAME + local key = {} + nextt() -- skip the dot or colon + checkname(key) + v.k = "VINDEXED" +end + +---------------------------------------------------------------------- +-- parse a table indexing suffix, for constructors, expressions +-- * used in recfield(), primaryexp() +---------------------------------------------------------------------- + +local function yindex(v) + -- index -> '[' expr ']' + nextt() -- skip the '[' + expr(v) + checknext("]") +end + +---------------------------------------------------------------------- +-- parse a table record (hash) field +-- * used in constructor() +---------------------------------------------------------------------- + +local function recfield(cc) + -- recfield -> (NAME | '['exp1']') = exp1 + local key, val = {}, {} + if tok == "" then + checkname(key) + else-- tok == '[' + yindex(key) + end + checknext("=") + expr(val) +end + +---------------------------------------------------------------------- +-- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH) +-- * note: retained in this skeleton because it modifies cc.v.k +-- * used in constructor() +---------------------------------------------------------------------- + +local function closelistfield(cc) + if cc.v.k == "VVOID" then return end -- there is no list item + cc.v.k = "VVOID" +end + +---------------------------------------------------------------------- +-- parse a table list (array) field +-- * used in constructor() +---------------------------------------------------------------------- + +local function listfield(cc) + expr(cc.v) +end + +---------------------------------------------------------------------- +-- parse a table constructor +-- * used in funcargs(), simpleexp() +---------------------------------------------------------------------- + +local function constructor(t) + -- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}' + -- field -> recfield | listfield + -- fieldsep -> ',' | ';' + local line = ln + local cc = {} + cc.v = {} + cc.t = t + t.k = "VRELOCABLE" + cc.v.k = "VVOID" + checknext("{") + repeat + if tok == "}" then break end + -- closelistfield(cc) here + local c = tok + if c == "" then -- may be listfields or recfields + if lookahead() ~= "=" then -- look ahead: expression? + listfield(cc) + else + recfield(cc) + end + elseif c == "[" then -- constructor_item -> recfield + recfield(cc) + else -- constructor_part -> listfield + listfield(cc) + end + until not testnext(",") and not testnext(";") + check_match("}", "{", line) + -- lastlistfield(cc) here +end + +---------------------------------------------------------------------- +-- parse the arguments (parameters) of a function declaration +-- * used in body() +---------------------------------------------------------------------- + +local function parlist() + -- parlist -> [ param { ',' param } ] + local nparams = 0 + if tok ~= ")" then -- is 'parlist' not empty? + repeat + local c = tok + if c == "" then -- param -> NAME + new_localvar(str_checkname()) + nparams = nparams + 1 + elseif c == "..." then + nextt() + fs.is_vararg = true + else + syntaxerror(" or '...' expected") + end + until fs.is_vararg or not testnext(",") + end--if + adjustlocalvars(nparams) +end + +---------------------------------------------------------------------- +-- parse the parameters of a function call +-- * contrast with parlist(), used in function declarations +-- * used in primaryexp() +---------------------------------------------------------------------- + +local function funcargs(f) + local args = {} + local line = ln + local c = tok + if c == "(" then -- funcargs -> '(' [ explist1 ] ')' + if line ~= lastln then + syntaxerror("ambiguous syntax (function call x new statement)") + end + nextt() + if tok == ")" then -- arg list is empty? + args.k = "VVOID" + else + explist1(args) + end + check_match(")", "(", line) + elseif c == "{" then -- funcargs -> constructor + constructor(args) + elseif c == "" then -- funcargs -> STRING + codestring(args, seminfo) + nextt() -- must use 'seminfo' before 'next' + else + syntaxerror("function arguments expected") + return + end--if c + f.k = "VCALL" +end + +--[[-------------------------------------------------------------------- +-- mostly expression functions +----------------------------------------------------------------------]] + +---------------------------------------------------------------------- +-- parses an expression in parentheses or a single variable +-- * used in primaryexp() +---------------------------------------------------------------------- + +local function prefixexp(v) + -- prefixexp -> NAME | '(' expr ')' + local c = tok + if c == "(" then + local line = ln + nextt() + expr(v) + check_match(")", "(", line) + elseif c == "" then + singlevar(v) + else + syntaxerror("unexpected symbol") + end--if c +end + +---------------------------------------------------------------------- +-- parses a prefixexp (an expression in parentheses or a single +-- variable) or a function call specification +-- * used in simpleexp(), assignment(), expr_stat() +---------------------------------------------------------------------- + +local function primaryexp(v) + -- primaryexp -> + -- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } + prefixexp(v) + while true do + local c = tok + if c == "." then -- field + field(v) + elseif c == "[" then -- '[' exp1 ']' + local key = {} + yindex(key) + elseif c == ":" then -- ':' NAME funcargs + local key = {} + nextt() + checkname(key) + funcargs(v) + elseif c == "(" or c == "" or c == "{" then -- funcargs + funcargs(v) + else + return + end--if c + end--while +end + +---------------------------------------------------------------------- +-- parses general expression types, constants handled here +-- * used in subexpr() +---------------------------------------------------------------------- + +local function simpleexp(v) + -- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... | + -- constructor | FUNCTION body | primaryexp + local c = tok + if c == "" then + v.k = "VKNUM" + elseif c == "" then + codestring(v, seminfo) + elseif c == "nil" then + v.k = "VNIL" + elseif c == "true" then + v.k = "VTRUE" + elseif c == "false" then + v.k = "VFALSE" + elseif c == "..." then -- vararg + check_condition(fs.is_vararg == true, + "cannot use '...' outside a vararg function"); + v.k = "VVARARG" + elseif c == "{" then -- constructor + constructor(v) + return + elseif c == "function" then + nextt() + body(v, false, ln) + return + else + primaryexp(v) + return + end--if c + nextt() +end + +------------------------------------------------------------------------ +-- Parse subexpressions. Includes handling of unary operators and binary +-- operators. A subexpr is given the rhs priority level of the operator +-- immediately left of it, if any (limit is -1 if none,) and if a binop +-- is found, limit is compared with the lhs priority level of the binop +-- in order to determine which executes first. +-- * recursively called +-- * used in expr() +------------------------------------------------------------------------ + +local function subexpr(v, limit) + -- subexpr -> (simpleexp | unop subexpr) { binop subexpr } + -- * where 'binop' is any binary operator with a priority + -- higher than 'limit' + local op = tok + local uop = unopr[op] + if uop then + nextt() + subexpr(v, UNARY_PRIORITY) + else + simpleexp(v) + end + -- expand while operators have priorities higher than 'limit' + op = tok + local binop = binopr_left[op] + while binop and binop > limit do + local v2 = {} + nextt() + -- read sub-expression with higher priority + local nextop = subexpr(v2, binopr_right[op]) + op = nextop + binop = binopr_left[op] + end + return op -- return first untreated operator +end + +---------------------------------------------------------------------- +-- Expression parsing starts here. Function subexpr is entered with the +-- left operator (which is non-existent) priority of -1, which is lower +-- than all actual operators. Expr information is returned in parm v. +-- * used in cond(), explist1(), index(), recfield(), listfield(), +-- prefixexp(), while_stat(), exp1() +---------------------------------------------------------------------- + +-- this is a forward-referenced local +function expr(v) + -- expr -> subexpr + subexpr(v, 0) +end + +--[[-------------------------------------------------------------------- +-- third level parsing functions +----------------------------------------------------------------------]] + +------------------------------------------------------------------------ +-- parse a variable assignment sequence +-- * recursively called +-- * used in expr_stat() +------------------------------------------------------------------------ + +local function assignment(v) + local e = {} + local c = v.v.k + check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL" + or c == "VINDEXED", "syntax error") + if testnext(",") then -- assignment -> ',' primaryexp assignment + local nv = {} -- expdesc + nv.v = {} + primaryexp(nv.v) + -- lparser.c deals with some register usage conflict here + assignment(nv) + else -- assignment -> '=' explist1 + checknext("=") + explist1(e) + return -- avoid default + end + e.k = "VNONRELOC" +end + +---------------------------------------------------------------------- +-- parse a for loop body for both versions of the for loop +-- * used in fornum(), forlist() +---------------------------------------------------------------------- + +local function forbody(nvars, isnum) + -- forbody -> DO block + checknext("do") + enterblock(false) -- scope for declared variables + adjustlocalvars(nvars) + block() + leaveblock() -- end of scope for declared variables +end + +---------------------------------------------------------------------- +-- parse a numerical for loop, calls forbody() +-- * used in for_stat() +---------------------------------------------------------------------- + +local function fornum(varname) + -- fornum -> NAME = exp1, exp1 [, exp1] DO body + local line = line + new_localvarliteral("(for index)") + new_localvarliteral("(for limit)") + new_localvarliteral("(for step)") + new_localvar(varname) + checknext("=") + exp1() -- initial value + checknext(",") + exp1() -- limit + if testnext(",") then + exp1() -- optional step + else + -- default step = 1 + end + forbody(1, true) +end + +---------------------------------------------------------------------- +-- parse a generic for loop, calls forbody() +-- * used in for_stat() +---------------------------------------------------------------------- + +local function forlist(indexname) + -- forlist -> NAME {, NAME} IN explist1 DO body + local e = {} + -- create control variables + new_localvarliteral("(for generator)") + new_localvarliteral("(for state)") + new_localvarliteral("(for control)") + -- create declared variables + new_localvar(indexname) + local nvars = 1 + while testnext(",") do + new_localvar(str_checkname()) + nvars = nvars + 1 + end + checknext("in") + local line = line + explist1(e) + forbody(nvars, false) +end + +---------------------------------------------------------------------- +-- parse a function name specification +-- * used in func_stat() +---------------------------------------------------------------------- + +local function funcname(v) + -- funcname -> NAME {field} [':' NAME] + local needself = false + singlevar(v) + while tok == "." do + field(v) + end + if tok == ":" then + needself = true + field(v) + end + return needself +end + +---------------------------------------------------------------------- +-- parse the single expressions needed in numerical for loops +-- * used in fornum() +---------------------------------------------------------------------- + +-- this is a forward-referenced local +function exp1() + -- exp1 -> expr + local e = {} + expr(e) +end + +---------------------------------------------------------------------- +-- parse condition in a repeat statement or an if control structure +-- * used in repeat_stat(), test_then_block() +---------------------------------------------------------------------- + +local function cond() + -- cond -> expr + local v = {} + expr(v) -- read condition +end + +---------------------------------------------------------------------- +-- parse part of an if control structure, including the condition +-- * used in if_stat() +---------------------------------------------------------------------- + +local function test_then_block() + -- test_then_block -> [IF | ELSEIF] cond THEN block + nextt() -- skip IF or ELSEIF + cond() + checknext("then") + block() -- 'then' part +end + +---------------------------------------------------------------------- +-- parse a local function statement +-- * used in local_stat() +---------------------------------------------------------------------- + +local function localfunc() + -- localfunc -> NAME body + local v, b = {} + new_localvar(str_checkname()) + v.k = "VLOCAL" + adjustlocalvars(1) + body(b, false, ln) +end + +---------------------------------------------------------------------- +-- parse a local variable declaration statement +-- * used in local_stat() +---------------------------------------------------------------------- + +local function localstat() + -- localstat -> NAME {',' NAME} ['=' explist1] + local nvars = 0 + local e = {} + repeat + new_localvar(str_checkname()) + nvars = nvars + 1 + until not testnext(",") + if testnext("=") then + explist1(e) + else + e.k = "VVOID" + end + adjustlocalvars(nvars) +end + +---------------------------------------------------------------------- +-- parse a list of comma-separated expressions +-- * used in return_stat(), localstat(), funcargs(), assignment(), +-- forlist() +---------------------------------------------------------------------- + +-- this is a forward-referenced local +function explist1(e) + -- explist1 -> expr { ',' expr } + expr(e) + while testnext(",") do + expr(e) + end +end + +---------------------------------------------------------------------- +-- parse function declaration body +-- * used in simpleexp(), localfunc(), func_stat() +---------------------------------------------------------------------- + +-- this is a forward-referenced local +function body(e, needself, line) + -- body -> '(' parlist ')' chunk END + open_func() + checknext("(") + if needself then + new_localvarliteral("self", true) + adjustlocalvars(1) + end + parlist() + checknext(")") + chunk() + check_match("end", "function", line) + close_func() +end + +---------------------------------------------------------------------- +-- parse a code block or unit +-- * used in do_stat(), while_stat(), forbody(), test_then_block(), +-- if_stat() +---------------------------------------------------------------------- + +-- this is a forward-referenced local +function block() + -- block -> chunk + enterblock(false) + chunk() + leaveblock() +end + +--[[-------------------------------------------------------------------- +-- second level parsing functions, all with '_stat' suffix +-- * since they are called via a table lookup, they cannot be local +-- functions (a lookup table of local functions might be smaller...) +-- * stat() -> *_stat() +----------------------------------------------------------------------]] + +---------------------------------------------------------------------- +-- initial parsing for a for loop, calls fornum() or forlist() +-- * removed 'line' parameter (used to set debug information only) +-- * used in stat() +---------------------------------------------------------------------- + +local function for_stat() + -- stat -> for_stat -> FOR (fornum | forlist) END + local line = line + enterblock(true) -- scope for loop and control variables + nextt() -- skip 'for' + local varname = str_checkname() -- first variable name + local c = tok + if c == "=" then + fornum(varname) + elseif c == "," or c == "in" then + forlist(varname) + else + syntaxerror("'=' or 'in' expected") + end + check_match("end", "for", line) + leaveblock() -- loop scope (`break' jumps to this point) +end + +---------------------------------------------------------------------- +-- parse a while-do control structure, body processed by block() +-- * used in stat() +---------------------------------------------------------------------- + +local function while_stat() + -- stat -> while_stat -> WHILE cond DO block END + local line = line + nextt() -- skip WHILE + cond() -- parse condition + enterblock(true) + checknext("do") + block() + check_match("end", "while", line) + leaveblock() +end + +---------------------------------------------------------------------- +-- parse a repeat-until control structure, body parsed by chunk() +-- * originally, repeatstat() calls breakstat() too if there is an +-- upvalue in the scope block; nothing is actually lexed, it is +-- actually the common code in breakstat() for closing of upvalues +-- * used in stat() +---------------------------------------------------------------------- + +local function repeat_stat() + -- stat -> repeat_stat -> REPEAT block UNTIL cond + local line = line + enterblock(true) -- loop block + enterblock(false) -- scope block + nextt() -- skip REPEAT + chunk() + check_match("until", "repeat", line) + cond() + -- close upvalues at scope level below + leaveblock() -- finish scope + leaveblock() -- finish loop +end + +---------------------------------------------------------------------- +-- parse an if control structure +-- * used in stat() +---------------------------------------------------------------------- + +local function if_stat() + -- stat -> if_stat -> IF cond THEN block + -- {ELSEIF cond THEN block} [ELSE block] END + local line = line + local v = {} + test_then_block() -- IF cond THEN block + while tok == "elseif" do + test_then_block() -- ELSEIF cond THEN block + end + if tok == "else" then + nextt() -- skip ELSE + block() -- 'else' part + end + check_match("end", "if", line) +end + +---------------------------------------------------------------------- +-- parse a return statement +-- * used in stat() +---------------------------------------------------------------------- + +local function return_stat() + -- stat -> return_stat -> RETURN explist + local e = {} + nextt() -- skip RETURN + local c = tok + if block_follow[c] or c == ";" then + -- return no values + else + explist1(e) -- optional return values + end +end + +---------------------------------------------------------------------- +-- parse a break statement +-- * used in stat() +---------------------------------------------------------------------- + +local function break_stat() + -- stat -> break_stat -> BREAK + local bl = fs.bl + nextt() -- skip BREAK + while bl and not bl.isbreakable do -- find a breakable block + bl = bl.prev + end + if not bl then + syntaxerror("no loop to break") + end +end + +---------------------------------------------------------------------- +-- parse a function call with no returns or an assignment statement +-- * the struct with .prev is used for name searching in lparse.c, +-- so it is retained for now; present in assignment() also +-- * used in stat() +---------------------------------------------------------------------- + +local function expr_stat() + local id = tpos - 1 + -- stat -> expr_stat -> func | assignment + local v = {} + v.v = {} + primaryexp(v.v) + if v.v.k == "VCALL" then -- stat -> func + -- call statement uses no results + statinfo[id] = "call" + else -- stat -> assignment + v.prev = nil + assignment(v) + statinfo[id] = "assign" + end +end + +---------------------------------------------------------------------- +-- parse a function statement +-- * used in stat() +---------------------------------------------------------------------- + +local function function_stat() + -- stat -> function_stat -> FUNCTION funcname body + local line = line + local v, b = {}, {} + nextt() -- skip FUNCTION + local needself = funcname(v) + body(b, needself, line) +end + +---------------------------------------------------------------------- +-- parse a simple block enclosed by a DO..END pair +-- * used in stat() +---------------------------------------------------------------------- + +local function do_stat() + -- stat -> do_stat -> DO block END + local line = line + nextt() -- skip DO + block() + check_match("end", "do", line) +end + +---------------------------------------------------------------------- +-- parse a statement starting with LOCAL +-- * used in stat() +---------------------------------------------------------------------- + +local function local_stat() + -- stat -> local_stat -> LOCAL FUNCTION localfunc + -- -> LOCAL localstat + nextt() -- skip LOCAL + if testnext("function") then -- local function? + localfunc() + else + localstat() + end +end + +--[[-------------------------------------------------------------------- +-- main functions, top level parsing functions +-- * accessible functions are: init(lexer), parser() +-- * [entry] -> parser() -> chunk() -> stat() +----------------------------------------------------------------------]] + +---------------------------------------------------------------------- +-- initial parsing for statements, calls '_stat' suffixed functions +-- * used in chunk() +---------------------------------------------------------------------- + +local stat_call = { -- lookup for calls in stat() + ["if"] = if_stat, + ["while"] = while_stat, + ["do"] = do_stat, + ["for"] = for_stat, + ["repeat"] = repeat_stat, + ["function"] = function_stat, + ["local"] = local_stat, + ["return"] = return_stat, + ["break"] = break_stat, +} + +local function stat() + -- stat -> if_stat while_stat do_stat for_stat repeat_stat + -- function_stat local_stat return_stat break_stat + -- expr_stat + line = ln -- may be needed for error messages + local c = tok + local fn = stat_call[c] + -- handles: if while do for repeat function local return break + if fn then + statinfo[tpos - 1] = c + fn() + -- return or break must be last statement + if c == "return" or c == "break" then return true end + else + expr_stat() + end + return false +end + +---------------------------------------------------------------------- +-- parse a chunk, which consists of a bunch of statements +-- * used in parser(), body(), block(), repeat_stat() +---------------------------------------------------------------------- + +-- this is a forward-referenced local +function chunk() + -- chunk -> { stat [';'] } + local islast = false + while not islast and not block_follow[tok] do + islast = stat() + testnext(";") + end +end + +---------------------------------------------------------------------- +-- performs parsing, returns parsed data structure +---------------------------------------------------------------------- + +function parser() + open_func() + fs.is_vararg = true -- main func. is always vararg + nextt() -- read first token + chunk() + check("") + close_func() + return { -- return everything + globalinfo = globalinfo, + localinfo = localinfo, + statinfo = statinfo, + toklist = toklist, + seminfolist = seminfolist, + toklnlist = toklnlist, + xreflist = xreflist, + } +end + +---------------------------------------------------------------------- +-- initialization function +---------------------------------------------------------------------- + +function init(tokorig, seminfoorig, toklnorig) + tpos = 1 -- token position + top_fs = {} -- reset top level function state + ------------------------------------------------------------------ + -- set up grammar-only token tables; impedance-matching... + -- note that constants returned by the lexer is source-level, so + -- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING) + ------------------------------------------------------------------ + local j = 1 + toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {} + for i = 1, #tokorig do + local tok = tokorig[i] + local yep = true + if tok == "TK_KEYWORD" or tok == "TK_OP" then + tok = seminfoorig[i] + elseif tok == "TK_NAME" then + tok = "" + seminfolist[j] = seminfoorig[i] + elseif tok == "TK_NUMBER" then + tok = "" + seminfolist[j] = 0 -- fake! + elseif tok == "TK_STRING" or tok == "TK_LSTRING" then + tok = "" + seminfolist[j] = "" -- fake! + elseif tok == "TK_EOS" then + tok = "" + else + -- non-grammar tokens; ignore them + yep = false + end + if yep then -- set rest of the information + toklist[j] = tok + toklnlist[j] = toklnorig[i] + xreflist[j] = i + j = j + 1 + end + end--for + ------------------------------------------------------------------ + -- initialize data structures for variable tracking + ------------------------------------------------------------------ + globalinfo, globallookup, localinfo = {}, {}, {} + ilocalinfo, ilocalrefs = {}, {} + statinfo = {} -- experimental +end +--end of inserted module +end + +-- preload function for module optlex +preload.optlex = +function() +--start of inserted module +module "optlex" + +local string = base.require "string" +local match = string.match +local sub = string.sub +local find = string.find +local rep = string.rep +local print + +------------------------------------------------------------------------ +-- variables and data structures +------------------------------------------------------------------------ + +-- error function, can override by setting own function into module +error = base.error + +warn = {} -- table for warning flags + +local stoks, sinfos, stoklns -- source lists + +local is_realtoken = { -- significant (grammar) tokens + TK_KEYWORD = true, + TK_NAME = true, + TK_NUMBER = true, + TK_STRING = true, + TK_LSTRING = true, + TK_OP = true, + TK_EOS = true, +} +local is_faketoken = { -- whitespace (non-grammar) tokens + TK_COMMENT = true, + TK_LCOMMENT = true, + TK_EOL = true, + TK_SPACE = true, +} + +local opt_details -- for extra information + +------------------------------------------------------------------------ +-- true if current token is at the start of a line +-- * skips over deleted tokens via recursion +------------------------------------------------------------------------ + +local function atlinestart(i) + local tok = stoks[i - 1] + if i <= 1 or tok == "TK_EOL" then + return true + elseif tok == "" then + return atlinestart(i - 1) + end + return false +end + +------------------------------------------------------------------------ +-- true if current token is at the end of a line +-- * skips over deleted tokens via recursion +------------------------------------------------------------------------ + +local function atlineend(i) + local tok = stoks[i + 1] + if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then + return true + elseif tok == "" then + return atlineend(i + 1) + end + return false +end + +------------------------------------------------------------------------ +-- counts comment EOLs inside a long comment +-- * in order to keep line numbering, EOLs need to be reinserted +------------------------------------------------------------------------ + +local function commenteols(lcomment) + local sep = #match(lcomment, "^%-%-%[=*%[") + local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims + local i, c = 1, 0 + while true do + local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) + if not p then break end -- if no matches, done + i = p + 1 + c = c + 1 + if #s > 0 and r ~= s then -- skip CRLF or LFCR + i = i + 1 + end + end + return c +end + +------------------------------------------------------------------------ +-- compares two tokens (i, j) and returns the whitespace required +-- * see documentation for a reference table of interactions +-- * only two grammar/real tokens are being considered +-- * if "", no separation is needed +-- * if " ", then at least one whitespace (or EOL) is required +-- * NOTE: this doesn't work at the start or the end or for EOS! +------------------------------------------------------------------------ + +local function checkpair(i, j) + local match = match + local t1, t2 = stoks[i], stoks[j] + -------------------------------------------------------------------- + if t1 == "TK_STRING" or t1 == "TK_LSTRING" or + t2 == "TK_STRING" or t2 == "TK_LSTRING" then + return "" + -------------------------------------------------------------------- + elseif t1 == "TK_OP" or t2 == "TK_OP" then + if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or + (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then + return "" + end + if t1 == "TK_OP" and t2 == "TK_OP" then + -- for TK_OP/TK_OP pairs, see notes in technotes.txt + local op, op2 = sinfos[i], sinfos[j] + if (match(op, "^%.%.?$") and match(op2, "^%.")) or + (match(op, "^[~=<>]$") and op2 == "=") or + (op == "[" and (op2 == "[" or op2 == "=")) then + return " " + end + return "" + end + -- "TK_OP" + "TK_NUMBER" case + local op = sinfos[i] + if t2 == "TK_OP" then op = sinfos[j] end + if match(op, "^%.%.?%.?$") then + return " " + end + return "" + -------------------------------------------------------------------- + else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then + return " " + -------------------------------------------------------------------- + end +end + +------------------------------------------------------------------------ +-- repack tokens, removing deletions caused by optimization process +------------------------------------------------------------------------ + +local function repack_tokens() + local dtoks, dinfos, dtoklns = {}, {}, {} + local j = 1 + for i = 1, #stoks do + local tok = stoks[i] + if tok ~= "" then + dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i] + j = j + 1 + end + end + stoks, sinfos, stoklns = dtoks, dinfos, dtoklns +end + +------------------------------------------------------------------------ +-- number optimization +-- * optimization using string formatting functions is one way of doing +-- this, but here, we consider all cases and handle them separately +-- (possibly an idiotic approach...) +-- * scientific notation being generated is not in canonical form, this +-- may or may not be a bad thing +-- * note: intermediate portions need to fit into a normal number range +-- * optimizations can be divided based on number patterns: +-- * hexadecimal: +-- (1) no need to remove leading zeros, just skip to (2) +-- (2) convert to integer if size equal or smaller +-- * change if equal size -> lose the 'x' to reduce entropy +-- (3) number is then processed as an integer +-- (4) note: does not make 0[xX] consistent +-- * integer: +-- (1) note: includes anything with trailing ".", ".0", ... +-- (2) remove useless fractional part, if present, e.g. 123.000 +-- (3) remove leading zeros, e.g. 000123 +-- (4) switch to scientific if shorter, e.g. 123000 -> 123e3 +-- * with fraction: +-- (1) split into digits dot digits +-- (2) if no integer portion, take as zero (can omit later) +-- (3) handle degenerate .000 case, after which the fractional part +-- must be non-zero (if zero, it's matched as an integer) +-- (4) remove trailing zeros for fractional portion +-- (5) p.q where p > 0 and q > 0 cannot be shortened any more +-- (6) otherwise p == 0 and the form is .q, e.g. .000123 +-- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6 +-- * scientific: +-- (1) split into (digits dot digits) [eE] ([+-] digits) +-- (2) if significand has ".", shift it out so it becomes an integer +-- (3) if significand is zero, just use zero +-- (4) remove leading zeros for significand +-- (5) shift out trailing zeros for significand +-- (6) examine exponent and determine which format is best: +-- integer, with fraction, scientific +------------------------------------------------------------------------ + +local function do_number(i) + local before = sinfos[i] -- 'before' + local z = before -- working representation + local y -- 'after', if better + -------------------------------------------------------------------- + if match(z, "^0[xX]") then -- hexadecimal number + local v = base.tostring(base.tonumber(z)) + if #v <= #z then + z = v -- change to integer, AND continue + else + return -- no change; stick to hex + end + end + -------------------------------------------------------------------- + if match(z, "^%d+%.?0*$") then -- integer or has useless frac + z = match(z, "^(%d+)%.?0*$") -- int portion only + if z + 0 > 0 then + z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros + local v = #match(z, "0*$") + local nv = base.tostring(v) + if v > #nv + 1 then -- scientific is shorter + z = sub(z, 1, #z - v).."e"..nv + end + y = z + else + y = "0" -- basic zero + end + -------------------------------------------------------------------- + elseif not match(z, "[eE]") then -- number with fraction part + local p, q = match(z, "^(%d*)%.(%d+)$") -- split + if p == "" then p = 0 end -- int part zero + if q + 0 == 0 and p == 0 then + y = "0" -- degenerate .000 case + else + -- now, q > 0 holds and p is a number + local v = #match(q, "0*$") -- remove trailing zeros + if v > 0 then + q = sub(q, 1, #q - v) + end + -- if p > 0, nothing else we can do to simplify p.q case + if p + 0 > 0 then + y = p.."."..q + else + y = "."..q -- tentative, e.g. .000123 + local v = #match(q, "^0*") -- # leading spaces + local w = #q - v -- # significant digits + local nv = base.tostring(#q) + -- e.g. compare 123e-6 versus .000123 + if w + 2 + #nv < 1 + #q then + y = sub(q, -w).."e-"..nv + end + end + end + -------------------------------------------------------------------- + else -- scientific number + local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$") + ex = base.tonumber(ex) + -- if got ".", shift out fractional portion of significand + local p, q = match(sig, "^(%d*)%.(%d*)$") + if p then + ex = ex - #q + sig = p..q + end + if sig + 0 == 0 then + y = "0" -- basic zero + else + local v = #match(sig, "^0*") -- remove leading zeros + sig = sub(sig, v + 1) + v = #match(sig, "0*$") -- shift out trailing zeros + if v > 0 then + sig = sub(sig, 1, #sig - v) + ex = ex + v + end + -- examine exponent and determine which format is best + local nex = base.tostring(ex) + if ex == 0 then -- it's just an integer + y = sig + elseif ex > 0 and (ex <= 1 + #nex) then -- a number + y = sig..rep("0", ex) + elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123 + v = #sig + ex + y = sub(sig, 1, v).."."..sub(sig, v + 1) + elseif ex < 0 and (#nex >= -ex - #sig) then + -- e.g. compare 1234e-5 versus .01234 + -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig + -- -> #nex >= -ex - #sig + v = -ex - #sig + y = "."..rep("0", v)..sig + else -- non-canonical scientific representation + y = sig.."e"..ex + end + end--if sig + end + -------------------------------------------------------------------- + if y and y ~= sinfos[i] then + if opt_details then + print(" (line "..stoklns[i]..") "..sinfos[i].." -> "..y) + opt_details = opt_details + 1 + end + sinfos[i] = y + end +end + +------------------------------------------------------------------------ +-- string optimization +-- * note: works on well-formed strings only! +-- * optimizations on characters can be summarized as follows: +-- \a\b\f\n\r\t\v -- no change +-- \\ -- no change +-- \"\' -- depends on delim, other can remove \ +-- \[\] -- remove \ +-- \ -- general escape, remove \ +-- \ -- normalize the EOL only +-- \ddd -- if \a\b\f\n\r\t\v, change to latter +-- if other < ascii 32, keep ddd but zap leading zeros +-- but cannot have following digits +-- if >= ascii 32, translate it into the literal, then also +-- do escapes for \\,\",\' cases +-- -- no change +-- * switch delimiters if string becomes shorter +------------------------------------------------------------------------ + +local function do_string(I) + local info = sinfos[I] + local delim = sub(info, 1, 1) -- delimiter used + local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> ' + local z = sub(info, 2, -2) -- actual string + local i = 1 + local c_delim, c_ndelim = 0, 0 -- "/' counts + -------------------------------------------------------------------- + while i <= #z do + local c = sub(z, i, i) + ---------------------------------------------------------------- + if c == "\\" then -- escaped stuff + local j = i + 1 + local d = sub(z, j, j) + local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true) + ------------------------------------------------------------ + if not p then -- \ -- remove \ + z = sub(z, 1, i - 1)..sub(z, j) + i = i + 1 + ------------------------------------------------------------ + elseif p <= 8 then -- \a\b\f\n\r\t\v\\ + i = i + 2 -- no change + ------------------------------------------------------------ + elseif p <= 10 then -- \ -- normalize EOL + local eol = sub(z, j, j + 1) + if eol == "\r\n" or eol == "\n\r" then + z = sub(z, 1, i).."\n"..sub(z, j + 2) + elseif p == 10 then -- \r case + z = sub(z, 1, i).."\n"..sub(z, j + 1) + end + i = i + 2 + ------------------------------------------------------------ + elseif p <= 12 then -- \"\' -- remove \ for ndelim + if d == delim then + c_delim = c_delim + 1 + i = i + 2 + else + c_ndelim = c_ndelim + 1 + z = sub(z, 1, i - 1)..sub(z, j) + i = i + 1 + end + ------------------------------------------------------------ + else -- \ddd -- various steps + local s = match(z, "^(%d%d?%d?)", j) + j = i + 1 + #s -- skip to location + local cv = s + 0 + local cc = string.char(cv) + local p = find("\a\b\f\n\r\t\v", cc, 1, true) + if p then -- special escapes + s = "\\"..sub("abfnrtv", p, p) + elseif cv < 32 then -- normalized \ddd + if match(sub(z, j, j), "%d") then + -- if a digit follows, \ddd cannot be shortened + s = "\\"..s + else + s = "\\"..cv + end + elseif cc == delim then -- \ + s = "\\"..cc + c_delim = c_delim + 1 + elseif cc == "\\" then -- \\ + s = "\\\\" + else -- literal character + s = cc + if cc == ndelim then + c_ndelim = c_ndelim + 1 + end + end + z = sub(z, 1, i - 1)..s..sub(z, j) + i = i + #s + ------------------------------------------------------------ + end--if p + ---------------------------------------------------------------- + else-- c ~= "\\" -- -- no change + i = i + 1 + if c == ndelim then -- count ndelim, for switching delimiters + c_ndelim = c_ndelim + 1 + end + ---------------------------------------------------------------- + end--if c + end--while + -------------------------------------------------------------------- + -- switching delimiters, a long-winded derivation: + -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes + -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes + -- simplifying the condition (1)>(2) --> c_delim > c_ndelim + if c_delim > c_ndelim then + i = 1 + while i <= #z do + local p, q, r = find(z, "([\'\"])", i) + if not p then break end + if r == delim then -- \ -> + z = sub(z, 1, p - 2)..sub(z, p) + i = p + else-- r == ndelim -- -> \ + z = sub(z, 1, p - 1).."\\"..sub(z, p) + i = p + 2 + end + end--while + delim = ndelim -- actually change delimiters + end + -------------------------------------------------------------------- + z = delim..z..delim + if z ~= sinfos[I] then + if opt_details then + print(" (line "..stoklns[I]..") "..sinfos[I].." -> "..z) + opt_details = opt_details + 1 + end + sinfos[I] = z + end +end + +------------------------------------------------------------------------ +-- long string optimization +-- * note: warning flagged if trailing whitespace found, not trimmed +-- * remove first optional newline +-- * normalize embedded newlines +-- * reduce '=' separators in delimiters if possible +------------------------------------------------------------------------ + +local function do_lstring(I) + local info = sinfos[I] + local delim1 = match(info, "^%[=*%[") -- cut out delimiters + local sep = #delim1 + local delim2 = sub(info, -sep, -1) + local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims + local y = "" + local i = 1 + -------------------------------------------------------------------- + while true do + local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) + -- deal with a single line + local ln + if not p then + ln = sub(z, i) + elseif p >= i then + ln = sub(z, i, p - 1) + end + if ln ~= "" then + -- flag a warning if there are trailing spaces, won't optimize! + if match(ln, "%s+$") then + warn.LSTRING = "trailing whitespace in long string near line "..stoklns[I] + end + y = y..ln + end + if not p then -- done if no more EOLs + break + end + -- deal with line endings, normalize them + i = p + 1 + if p then + if #s > 0 and r ~= s then -- skip CRLF or LFCR + i = i + 1 + end + -- skip first newline, which can be safely deleted + if not(i == 1 and i == p) then + y = y.."\n" + end + end + end--while + -------------------------------------------------------------------- + -- handle possible deletion of one or more '=' separators + if sep >= 3 then + local chk, okay = sep - 1 + -- loop to test ending delimiter with less of '=' down to zero + while chk >= 2 do + local delim = "%]"..rep("=", chk - 2).."%]" + if not match(y, delim) then okay = chk end + chk = chk - 1 + end + if okay then -- change delimiters + sep = rep("=", okay - 2) + delim1, delim2 = "["..sep.."[", "]"..sep.."]" + end + end + -------------------------------------------------------------------- + sinfos[I] = delim1..y..delim2 +end + +------------------------------------------------------------------------ +-- long comment optimization +-- * note: does not remove first optional newline +-- * trim trailing whitespace +-- * normalize embedded newlines +-- * reduce '=' separators in delimiters if possible +------------------------------------------------------------------------ + +local function do_lcomment(I) + local info = sinfos[I] + local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters + local sep = #delim1 + local delim2 = sub(info, -(sep - 2), -1) + local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims + local y = "" + local i = 1 + -------------------------------------------------------------------- + while true do + local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) + -- deal with a single line, extract and check trailing whitespace + local ln + if not p then + ln = sub(z, i) + elseif p >= i then + ln = sub(z, i, p - 1) + end + if ln ~= "" then + -- trim trailing whitespace if non-empty line + local ws = match(ln, "%s*$") + if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end + y = y..ln + end + if not p then -- done if no more EOLs + break + end + -- deal with line endings, normalize them + i = p + 1 + if p then + if #s > 0 and r ~= s then -- skip CRLF or LFCR + i = i + 1 + end + y = y.."\n" + end + end--while + -------------------------------------------------------------------- + -- handle possible deletion of one or more '=' separators + sep = sep - 2 + if sep >= 3 then + local chk, okay = sep - 1 + -- loop to test ending delimiter with less of '=' down to zero + while chk >= 2 do + local delim = "%]"..rep("=", chk - 2).."%]" + if not match(y, delim) then okay = chk end + chk = chk - 1 + end + if okay then -- change delimiters + sep = rep("=", okay - 2) + delim1, delim2 = "--["..sep.."[", "]"..sep.."]" + end + end + -------------------------------------------------------------------- + sinfos[I] = delim1..y..delim2 +end + +------------------------------------------------------------------------ +-- short comment optimization +-- * trim trailing whitespace +------------------------------------------------------------------------ + +local function do_comment(i) + local info = sinfos[i] + local ws = match(info, "%s*$") -- just look from end of string + if #ws > 0 then + info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace + end + sinfos[i] = info +end + +------------------------------------------------------------------------ +-- returns true if string found in long comment +-- * this is a feature to keep copyright or license texts +------------------------------------------------------------------------ + +local function keep_lcomment(opt_keep, info) + if not opt_keep then return false end -- option not set + local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters + local sep = #delim1 + local delim2 = sub(info, -sep, -1) + local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims + if find(z, opt_keep, 1, true) then -- try to match + return true + end +end + +------------------------------------------------------------------------ +-- main entry point +-- * currently, lexer processing has 2 passes +-- * processing is done on a line-oriented basis, which is easier to +-- grok due to the next point... +-- * since there are various options that can be enabled or disabled, +-- processing is a little messy or convoluted +------------------------------------------------------------------------ + +function optimize(option, toklist, semlist, toklnlist) + -------------------------------------------------------------------- + -- set option flags + -------------------------------------------------------------------- + local opt_comments = option["opt-comments"] + local opt_whitespace = option["opt-whitespace"] + local opt_emptylines = option["opt-emptylines"] + local opt_eols = option["opt-eols"] + local opt_strings = option["opt-strings"] + local opt_numbers = option["opt-numbers"] + local opt_x = option["opt-experimental"] + local opt_keep = option.KEEP + opt_details = option.DETAILS and 0 -- upvalues for details display + print = print or base.print + if opt_eols then -- forced settings, otherwise won't work properly + opt_comments = true + opt_whitespace = true + opt_emptylines = true + elseif opt_x then + opt_whitespace = true + end + -------------------------------------------------------------------- + -- variable initialization + -------------------------------------------------------------------- + stoks, sinfos, stoklns -- set source lists + = toklist, semlist, toklnlist + local i = 1 -- token position + local tok, info -- current token + local prev -- position of last grammar token + -- on same line (for TK_SPACE stuff) + -------------------------------------------------------------------- + -- changes a token, info pair + -------------------------------------------------------------------- + local function settoken(tok, info, I) + I = I or i + stoks[I] = tok or "" + sinfos[I] = info or "" + end + -------------------------------------------------------------------- + -- experimental optimization for ';' operator + -------------------------------------------------------------------- + if opt_x then + while true do + tok, info = stoks[i], sinfos[i] + if tok == "TK_EOS" then -- end of stream/pass + break + elseif tok == "TK_OP" and info == ";" then + -- ';' operator found, since it is entirely optional, set it + -- as a space to let whitespace optimization do the rest + settoken("TK_SPACE", " ") + end + i = i + 1 + end + repack_tokens() + end + -------------------------------------------------------------------- + -- processing loop (PASS 1) + -------------------------------------------------------------------- + i = 1 + while true do + tok, info = stoks[i], sinfos[i] + ---------------------------------------------------------------- + local atstart = atlinestart(i) -- set line begin flag + if atstart then prev = nil end + ---------------------------------------------------------------- + if tok == "TK_EOS" then -- end of stream/pass + break + ---------------------------------------------------------------- + elseif tok == "TK_KEYWORD" or -- keywords, identifiers, + tok == "TK_NAME" or -- operators + tok == "TK_OP" then + -- TK_KEYWORD and TK_OP can't be optimized without a big + -- optimization framework; it would be more of an optimizing + -- compiler, not a source code compressor + -- TK_NAME that are locals needs parser to analyze/optimize + prev = i + ---------------------------------------------------------------- + elseif tok == "TK_NUMBER" then -- numbers + if opt_numbers then + do_number(i) -- optimize + end + prev = i + ---------------------------------------------------------------- + elseif tok == "TK_STRING" or -- strings, long strings + tok == "TK_LSTRING" then + if opt_strings then + if tok == "TK_STRING" then + do_string(i) -- optimize + else + do_lstring(i) -- optimize + end + end + prev = i + ---------------------------------------------------------------- + elseif tok == "TK_COMMENT" then -- short comments + if opt_comments then + if i == 1 and sub(info, 1, 1) == "#" then + -- keep shbang comment, trim whitespace + do_comment(i) + else + -- safe to delete, as a TK_EOL (or TK_EOS) always follows + settoken() -- remove entirely + end + elseif opt_whitespace then -- trim whitespace only + do_comment(i) + end + ---------------------------------------------------------------- + elseif tok == "TK_LCOMMENT" then -- long comments + if keep_lcomment(opt_keep, info) then + ------------------------------------------------------------ + -- if --keep, we keep a long comment if is found; + -- this is a feature to keep copyright or license texts + if opt_whitespace then -- trim whitespace only + do_lcomment(i) + end + prev = i + elseif opt_comments then + local eols = commenteols(info) + ------------------------------------------------------------ + -- prepare opt_emptylines case first, if a disposable token + -- follows, current one is safe to dump, else keep a space; + -- it is implied that the operation is safe for '-', because + -- current is a TK_LCOMMENT, and must be separate from a '-' + if is_faketoken[stoks[i + 1]] then + settoken() -- remove entirely + tok = "" + else + settoken("TK_SPACE", " ") + end + ------------------------------------------------------------ + -- if there are embedded EOLs to keep and opt_emptylines is + -- disabled, then switch the token into one or more EOLs + if not opt_emptylines and eols > 0 then + settoken("TK_EOL", rep("\n", eols)) + end + ------------------------------------------------------------ + -- if optimizing whitespaces, force reinterpretation of the + -- token to give a chance for the space to be optimized away + if opt_whitespace and tok ~= "" then + i = i - 1 -- to reinterpret + end + ------------------------------------------------------------ + else -- disabled case + if opt_whitespace then -- trim whitespace only + do_lcomment(i) + end + prev = i + end + ---------------------------------------------------------------- + elseif tok == "TK_EOL" then -- line endings + if atstart and opt_emptylines then + settoken() -- remove entirely + elseif info == "\r\n" or info == "\n\r" then + -- normalize the rest of the EOLs for CRLF/LFCR only + -- (note that TK_LCOMMENT can change into several EOLs) + settoken("TK_EOL", "\n") + end + ---------------------------------------------------------------- + elseif tok == "TK_SPACE" then -- whitespace + if opt_whitespace then + if atstart or atlineend(i) then + -- delete leading and trailing whitespace + settoken() -- remove entirely + else + ------------------------------------------------------------ + -- at this point, since leading whitespace have been removed, + -- there should be a either a real token or a TK_LCOMMENT + -- prior to hitting this whitespace; the TK_LCOMMENT case + -- only happens if opt_comments is disabled; so prev ~= nil + local ptok = stoks[prev] + if ptok == "TK_LCOMMENT" then + -- previous TK_LCOMMENT can abut with anything + settoken() -- remove entirely + else + -- prev must be a grammar token; consecutive TK_SPACE + -- tokens is impossible when optimizing whitespace + local ntok = stoks[i + 1] + if is_faketoken[ntok] then + -- handle special case where a '-' cannot abut with + -- either a short comment or a long comment + if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and + ptok == "TK_OP" and sinfos[prev] == "-" then + -- keep token + else + settoken() -- remove entirely + end + else--is_realtoken + -- check a pair of grammar tokens, if can abut, then + -- delete space token entirely, otherwise keep one space + local s = checkpair(prev, i + 1) + if s == "" then + settoken() -- remove entirely + else + settoken("TK_SPACE", " ") + end + end + end + ------------------------------------------------------------ + end + end + ---------------------------------------------------------------- + else + error("unidentified token encountered") + end + ---------------------------------------------------------------- + i = i + 1 + end--while + repack_tokens() + -------------------------------------------------------------------- + -- processing loop (PASS 2) + -------------------------------------------------------------------- + if opt_eols then + i = 1 + -- aggressive EOL removal only works with most non-grammar tokens + -- optimized away because it is a rather simple scheme -- basically + -- it just checks 'real' token pairs around EOLs + if stoks[1] == "TK_COMMENT" then + -- first comment still existing must be shbang, skip whole line + i = 3 + end + while true do + tok, info = stoks[i], sinfos[i] + -------------------------------------------------------------- + if tok == "TK_EOS" then -- end of stream/pass + break + -------------------------------------------------------------- + elseif tok == "TK_EOL" then -- consider each TK_EOL + local t1, t2 = stoks[i - 1], stoks[i + 1] + if is_realtoken[t1] and is_realtoken[t2] then -- sanity check + local s = checkpair(i - 1, i + 1) + if s == "" or t2 == "TK_EOS" then + settoken() -- remove entirely + end + end + end--if tok + -------------------------------------------------------------- + i = i + 1 + end--while + repack_tokens() + end + -------------------------------------------------------------------- + if opt_details and opt_details > 0 then print() end -- spacing + return stoks, sinfos, stoklns +end +--end of inserted module +end + +-- preload function for module optparser +preload.optparser = +function() +--start of inserted module +module "optparser" + +local string = base.require "string" +local table = base.require "table" + +---------------------------------------------------------------------- +-- Letter frequencies for reducing symbol entropy (fixed version) +-- * Might help a wee bit when the output file is compressed +-- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies +-- * We use letter frequencies according to a Linotype keyboard, plus +-- the underscore, and both lower case and upper case letters. +-- * The arrangement below (LC, underscore, %d, UC) is arbitrary. +-- * This is certainly not optimal, but is quick-and-dirty and the +-- process has no significant overhead +---------------------------------------------------------------------- + +local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ" +local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ" + +-- names or identifiers that must be skipped +-- * the first two lines are for keywords +local SKIP_NAME = {} +for v in string.gmatch([[ +and break do else elseif end false for function if in +local nil not or repeat return then true until while +self]], "%S+") do + SKIP_NAME[v] = true +end + +------------------------------------------------------------------------ +-- variables and data structures +------------------------------------------------------------------------ + +local toklist, seminfolist, -- token lists (lexer output) + tokpar, seminfopar, xrefpar, -- token lists (parser output) + globalinfo, localinfo, -- variable information tables + statinfo, -- statment type table + globaluniq, localuniq, -- unique name tables + var_new, -- index of new variable names + varlist -- list of output variables + +---------------------------------------------------------------------- +-- preprocess information table to get lists of unique names +---------------------------------------------------------------------- + +local function preprocess(infotable) + local uniqtable = {} + for i = 1, #infotable do -- enumerate info table + local obj = infotable[i] + local name = obj.name + -------------------------------------------------------------------- + if not uniqtable[name] then -- not found, start an entry + uniqtable[name] = { + decl = 0, token = 0, size = 0, + } + end + -------------------------------------------------------------------- + local uniq = uniqtable[name] -- count declarations, tokens, size + uniq.decl = uniq.decl + 1 + local xref = obj.xref + local xcount = #xref + uniq.token = uniq.token + xcount + uniq.size = uniq.size + xcount * #name + -------------------------------------------------------------------- + if obj.decl then -- if local table, create first,last pairs + obj.id = i + obj.xcount = xcount + if xcount > 1 then -- if ==1, means local never accessed + obj.first = xref[2] + obj.last = xref[xcount] + end + -------------------------------------------------------------------- + else -- if global table, add a back ref + uniq.id = i + end + -------------------------------------------------------------------- + end--for + return uniqtable +end + +---------------------------------------------------------------------- +-- calculate actual symbol frequencies, in order to reduce entropy +-- * this may help further reduce the size of compressed sources +-- * note that since parsing optimizations is put before lexing +-- optimizations, the frequency table is not exact! +-- * yes, this will miss --keep block comments too... +---------------------------------------------------------------------- + +local function recalc_for_entropy(option) + local byte = string.byte + local char = string.char + -- table of token classes to accept in calculating symbol frequency + local ACCEPT = { + TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, + TK_STRING = true, TK_LSTRING = true, + } + if not option["opt-comments"] then + ACCEPT.TK_COMMENT = true + ACCEPT.TK_LCOMMENT = true + end + -------------------------------------------------------------------- + -- create a new table and remove any original locals by filtering + -------------------------------------------------------------------- + local filtered = {} + for i = 1, #toklist do + filtered[i] = seminfolist[i] + end + for i = 1, #localinfo do -- enumerate local info table + local obj = localinfo[i] + local xref = obj.xref + for j = 1, obj.xcount do + local p = xref[j] + filtered[p] = "" -- remove locals + end + end + -------------------------------------------------------------------- + local freq = {} -- reset symbol frequency table + for i = 0, 255 do freq[i] = 0 end + for i = 1, #toklist do -- gather symbol frequency + local tok, info = toklist[i], filtered[i] + if ACCEPT[tok] then + for j = 1, #info do + local c = byte(info, j) + freq[c] = freq[c] + 1 + end + end--if + end--for + -------------------------------------------------------------------- + -- function to re-sort symbols according to actual frequencies + -------------------------------------------------------------------- + local function resort(symbols) + local symlist = {} + for i = 1, #symbols do -- prepare table to sort + local c = byte(symbols, i) + symlist[i] = { c = c, freq = freq[c], } + end + table.sort(symlist, -- sort selected symbols + function(v1, v2) + return v1.freq > v2.freq + end + ) + local charlist = {} -- reconstitute the string + for i = 1, #symlist do + charlist[i] = char(symlist[i].c) + end + return table.concat(charlist) + end + -------------------------------------------------------------------- + LETTERS = resort(LETTERS) -- change letter arrangement + ALPHANUM = resort(ALPHANUM) +end + +---------------------------------------------------------------------- +-- returns a string containing a new local variable name to use, and +-- a flag indicating whether it collides with a global variable +-- * trapping keywords and other names like 'self' is done elsewhere +---------------------------------------------------------------------- + +local function new_var_name() + local var + local cletters, calphanum = #LETTERS, #ALPHANUM + local v = var_new + if v < cletters then -- single char + v = v + 1 + var = string.sub(LETTERS, v, v) + else -- longer names + local range, sz = cletters, 1 -- calculate # chars fit + repeat + v = v - range + range = range * calphanum + sz = sz + 1 + until range > v + local n = v % cletters -- left side cycles faster + v = (v - n) / cletters -- do first char first + n = n + 1 + var = string.sub(LETTERS, n, n) + while sz > 1 do + local m = v % calphanum + v = (v - m) / calphanum + m = m + 1 + var = var..string.sub(ALPHANUM, m, m) + sz = sz - 1 + end + end + var_new = var_new + 1 + return var, globaluniq[var] ~= nil +end + +---------------------------------------------------------------------- +-- calculate and print some statistics +-- * probably better in main source, put here for now +---------------------------------------------------------------------- + +local function stats_summary(globaluniq, localuniq, afteruniq, option) + local print = print or base.print + local fmt = string.format + local opt_details = option.DETAILS + if option.QUIET then return end + local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to, -- stats needed + decl_g, decl_li, decl_lo, decl_ti, decl_to, + token_g, token_li, token_lo, token_ti, token_to, + size_g, size_li, size_lo, size_ti, size_to + = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + local function avg(c, l) -- safe average function + if c == 0 then return 0 end + return l / c + end + -------------------------------------------------------------------- + -- collect statistics (note: globals do not have declarations!) + -------------------------------------------------------------------- + for name, uniq in base.pairs(globaluniq) do + uniq_g = uniq_g + 1 + token_g = token_g + uniq.token + size_g = size_g + uniq.size + end + for name, uniq in base.pairs(localuniq) do + uniq_li = uniq_li + 1 + decl_li = decl_li + uniq.decl + token_li = token_li + uniq.token + size_li = size_li + uniq.size + end + for name, uniq in base.pairs(afteruniq) do + uniq_lo = uniq_lo + 1 + decl_lo = decl_lo + uniq.decl + token_lo = token_lo + uniq.token + size_lo = size_lo + uniq.size + end + uniq_ti = uniq_g + uniq_li + decl_ti = decl_g + decl_li + token_ti = token_g + token_li + size_ti = size_g + size_li + uniq_to = uniq_g + uniq_lo + decl_to = decl_g + decl_lo + token_to = token_g + token_lo + size_to = size_g + size_lo + -------------------------------------------------------------------- + -- detailed stats: global list + -------------------------------------------------------------------- + if opt_details then + local sorted = {} -- sort table of unique global names by size + for name, uniq in base.pairs(globaluniq) do + uniq.name = name + sorted[#sorted + 1] = uniq + end + table.sort(sorted, + function(v1, v2) + return v1.size > v2.size + end + ) + local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s" + local hl = string.rep("-", 44) + print("*** global variable list (sorted by size) ***\n"..hl) + print(fmt(tabf1, "Token", "Input", "Input", "Global")) + print(fmt(tabf1, "Count", "Bytes", "Average", "Name")) + print(hl) + for i = 1, #sorted do + local uniq = sorted[i] + print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name)) + end + print(hl) + print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL")) + print(hl.."\n") + -------------------------------------------------------------------- + -- detailed stats: local list + -------------------------------------------------------------------- + local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s" + local hl = string.rep("-", 70) + print("*** local variable list (sorted by allocation order) ***\n"..hl) + print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global")) + print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name")) + print(hl) + for i = 1, #varlist do -- iterate according to order assigned + local name = varlist[i] + local uniq = afteruniq[name] + local old_t, old_s = 0, 0 + for j = 1, #localinfo do -- find corresponding old names and calculate + local obj = localinfo[j] + if obj.name == name then + old_t = old_t + obj.xcount + old_s = old_s + obj.xcount * #obj.oldname + end + end + print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s), + uniq.size, avg(uniq.token, uniq.size), name)) + end + print(hl) + print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li), + size_lo, avg(token_lo, size_lo), "TOTAL")) + print(hl.."\n") + end--if opt_details + -------------------------------------------------------------------- + -- display output + -------------------------------------------------------------------- + local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f" + local hl = string.rep("-", 58) + print("*** local variable optimization summary ***\n"..hl) + print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average")) + print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes")) + print(hl) + print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g))) + print(hl) + print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li))) + print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti))) + print(hl) + print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo))) + print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to))) + print(hl.."\n") +end + +---------------------------------------------------------------------- +-- experimental optimization for f("string") statements +-- * safe to delete parentheses without adding whitespace, as both +-- kinds of strings can abut with anything else +---------------------------------------------------------------------- + +local function optimize_func1() + ------------------------------------------------------------------ + local function is_strcall(j) -- find f("string") pattern + local t1 = tokpar[j + 1] or "" + local t2 = tokpar[j + 2] or "" + local t3 = tokpar[j + 3] or "" + if t1 == "(" and t2 == "" and t3 == ")" then + return true + end + end + ------------------------------------------------------------------ + local del_list = {} -- scan for function pattern, + local i = 1 -- tokens to be deleted are marked + while i <= #tokpar do + local id = statinfo[i] + if id == "call" and is_strcall(i) then -- found & mark () + del_list[i + 1] = true -- '(' + del_list[i + 3] = true -- ')' + i = i + 3 + end + i = i + 1 + end + ------------------------------------------------------------------ + -- delete a token and adjust all relevant tables + -- * currently invalidates globalinfo and localinfo (not updated), + -- so any other optimization is done after processing locals + -- (of course, we can also lex the source data again...) + -- * faster one-pass token deletion + ------------------------------------------------------------------ + local i, dst, idend = 1, 1, #tokpar + local del_list2 = {} + while dst <= idend do -- process parser tables + if del_list[i] then -- found a token to delete? + del_list2[xrefpar[i]] = true + i = i + 1 + end + if i > dst then + if i <= idend then -- shift table items lower + tokpar[dst] = tokpar[i] + seminfopar[dst] = seminfopar[i] + xrefpar[dst] = xrefpar[i] - (i - dst) + statinfo[dst] = statinfo[i] + else -- nil out excess entries + tokpar[dst] = nil + seminfopar[dst] = nil + xrefpar[dst] = nil + statinfo[dst] = nil + end + end + i = i + 1 + dst = dst + 1 + end + local i, dst, idend = 1, 1, #toklist + while dst <= idend do -- process lexer tables + if del_list2[i] then -- found a token to delete? + i = i + 1 + end + if i > dst then + if i <= idend then -- shift table items lower + toklist[dst] = toklist[i] + seminfolist[dst] = seminfolist[i] + else -- nil out excess entries + toklist[dst] = nil + seminfolist[dst] = nil + end + end + i = i + 1 + dst = dst + 1 + end +end + +---------------------------------------------------------------------- +-- local variable optimization +---------------------------------------------------------------------- + +local function optimize_locals(option) + var_new = 0 -- reset variable name allocator + varlist = {} + ------------------------------------------------------------------ + -- preprocess global/local tables, handle entropy reduction + ------------------------------------------------------------------ + globaluniq = preprocess(globalinfo) + localuniq = preprocess(localinfo) + if option["opt-entropy"] then -- for entropy improvement + recalc_for_entropy(option) + end + ------------------------------------------------------------------ + -- build initial declared object table, then sort according to + -- token count, this might help assign more tokens to more common + -- variable names such as 'e' thus possibly reducing entropy + -- * an object knows its localinfo index via its 'id' field + -- * special handling for "self" special local (parameter) here + ------------------------------------------------------------------ + local object = {} + for i = 1, #localinfo do + object[i] = localinfo[i] + end + table.sort(object, -- sort largest first + function(v1, v2) + return v1.xcount > v2.xcount + end + ) + ------------------------------------------------------------------ + -- the special "self" function parameters must be preserved + -- * the allocator below will never use "self", so it is safe to + -- keep those implicit declarations as-is + ------------------------------------------------------------------ + local temp, j, gotself = {}, 1, false + for i = 1, #object do + local obj = object[i] + if not obj.isself then + temp[j] = obj + j = j + 1 + else + gotself = true + end + end + object = temp + ------------------------------------------------------------------ + -- a simple first-come first-served heuristic name allocator, + -- note that this is in no way optimal... + -- * each object is a local variable declaration plus existence + -- * the aim is to assign short names to as many tokens as possible, + -- so the following tries to maximize name reuse + -- * note that we preserve sort order + ------------------------------------------------------------------ + local nobject = #object + while nobject > 0 do + local varname, gcollide + repeat + varname, gcollide = new_var_name() -- collect a variable name + until not SKIP_NAME[varname] -- skip all special names + varlist[#varlist + 1] = varname -- keep a list + local oleft = nobject + ------------------------------------------------------------------ + -- if variable name collides with an existing global, the name + -- cannot be used by a local when the name is accessed as a global + -- during which the local is alive (between 'act' to 'rem'), so + -- we drop objects that collides with the corresponding global + ------------------------------------------------------------------ + if gcollide then + -- find the xref table of the global + local gref = globalinfo[globaluniq[varname].id].xref + local ngref = #gref + -- enumerate for all current objects; all are valid at this point + for i = 1, nobject do + local obj = object[i] + local act, rem = obj.act, obj.rem -- 'live' range of local + -- if rem < 0, it is a -id to a local that had the same name + -- so follow rem to extend it; does this make sense? + while rem < 0 do + rem = localinfo[-rem].rem + end + local drop + for j = 1, ngref do + local p = gref[j] + if p >= act and p <= rem then drop = true end -- in range? + end + if drop then + obj.skip = true + oleft = oleft - 1 + end + end--for + end--if gcollide + ------------------------------------------------------------------ + -- now the first unassigned local (since it's sorted) will be the + -- one with the most tokens to rename, so we set this one and then + -- eliminate all others that collides, then any locals that left + -- can then reuse the same variable name; this is repeated until + -- all local declaration that can use this name is assigned + -- * the criteria for local-local reuse/collision is: + -- A is the local with a name already assigned + -- B is the unassigned local under consideration + -- => anytime A is accessed, it cannot be when B is 'live' + -- => to speed up things, we have first/last accesses noted + ------------------------------------------------------------------ + while oleft > 0 do + local i = 1 + while object[i].skip do -- scan for first object + i = i + 1 + end + ------------------------------------------------------------------ + -- first object is free for assignment of the variable name + -- [first,last] gives the access range for collision checking + ------------------------------------------------------------------ + oleft = oleft - 1 + local obja = object[i] + i = i + 1 + obja.newname = varname + obja.skip = true + obja.done = true + local first, last = obja.first, obja.last + local xref = obja.xref + ------------------------------------------------------------------ + -- then, scan all the rest and drop those colliding + -- if A was never accessed then it'll never collide with anything + -- otherwise trivial skip if: + -- * B was activated after A's last access (last < act) + -- * B was removed before A's first access (first > rem) + -- if not, see detailed skip below... + ------------------------------------------------------------------ + if first and oleft > 0 then -- must have at least 1 access + local scanleft = oleft + while scanleft > 0 do + while object[i].skip do -- next valid object + i = i + 1 + end + scanleft = scanleft - 1 + local objb = object[i] + i = i + 1 + local act, rem = objb.act, objb.rem -- live range of B + -- if rem < 0, extend range of rem thru' following local + while rem < 0 do + rem = localinfo[-rem].rem + end + -------------------------------------------------------- + if not(last < act or first > rem) then -- possible collision + -------------------------------------------------------- + -- B is activated later than A or at the same statement, + -- this means for no collision, A cannot be accessed when B + -- is alive, since B overrides A (or is a peer) + -------------------------------------------------------- + if act >= obja.act then + for j = 1, obja.xcount do -- ... then check every access + local p = xref[j] + if p >= act and p <= rem then -- A accessed when B live! + oleft = oleft - 1 + objb.skip = true + break + end + end--for + -------------------------------------------------------- + -- A is activated later than B, this means for no collision, + -- A's access is okay since it overrides B, but B's last + -- access need to be earlier than A's activation time + -------------------------------------------------------- + else + if objb.last and objb.last >= obja.act then + oleft = oleft - 1 + objb.skip = true + end + end + end + -------------------------------------------------------- + if oleft == 0 then break end + end + end--if first + ------------------------------------------------------------------ + end--while + ------------------------------------------------------------------ + -- after assigning all possible locals to one variable name, the + -- unassigned locals/objects have the skip field reset and the table + -- is compacted, to hopefully reduce iteration time + ------------------------------------------------------------------ + local temp, j = {}, 1 + for i = 1, nobject do + local obj = object[i] + if not obj.done then + obj.skip = false + temp[j] = obj + j = j + 1 + end + end + object = temp -- new compacted object table + nobject = #object -- objects left to process + ------------------------------------------------------------------ + end--while + ------------------------------------------------------------------ + -- after assigning all locals with new variable names, we can + -- patch in the new names, and reprocess to get 'after' stats + ------------------------------------------------------------------ + for i = 1, #localinfo do -- enumerate all locals + local obj = localinfo[i] + local xref = obj.xref + if obj.newname then -- if got new name, patch it in + for j = 1, obj.xcount do + local p = xref[j] -- xrefs indexes the token list + seminfolist[p] = obj.newname + end + obj.name, obj.oldname -- adjust names + = obj.newname, obj.name + else + obj.oldname = obj.name -- for cases like 'self' + end + end + ------------------------------------------------------------------ + -- deal with statistics output + ------------------------------------------------------------------ + if gotself then -- add 'self' to end of list + varlist[#varlist + 1] = "self" + end + local afteruniq = preprocess(localinfo) + stats_summary(globaluniq, localuniq, afteruniq, option) +end + + +---------------------------------------------------------------------- +-- main entry point +---------------------------------------------------------------------- + +function optimize(option, _toklist, _seminfolist, xinfo) + -- set tables + toklist, seminfolist -- from lexer + = _toklist, _seminfolist + tokpar, seminfopar, xrefpar -- from parser + = xinfo.toklist, xinfo.seminfolist, xinfo.xreflist + globalinfo, localinfo, statinfo -- from parser + = xinfo.globalinfo, xinfo.localinfo, xinfo.statinfo + ------------------------------------------------------------------ + -- optimize locals + ------------------------------------------------------------------ + if option["opt-locals"] then + optimize_locals(option) + end + ------------------------------------------------------------------ + -- other optimizations + ------------------------------------------------------------------ + if option["opt-experimental"] then -- experimental + optimize_func1() + -- WARNING globalinfo and localinfo now invalidated! + end +end +--end of inserted module +end + +-- preload function for module equiv +preload.equiv = +function() +--start of inserted module +module "equiv" + +local string = base.require "string" +local loadstring = base.loadstring +local sub = string.sub +local match = string.match +local dump = string.dump +local byte = string.byte + +--[[-------------------------------------------------------------------- +-- variable and data initialization +----------------------------------------------------------------------]] + +local is_realtoken = { -- significant (grammar) tokens + TK_KEYWORD = true, + TK_NAME = true, + TK_NUMBER = true, + TK_STRING = true, + TK_LSTRING = true, + TK_OP = true, + TK_EOS = true, +} + +local option, llex, warn + +--[[-------------------------------------------------------------------- +-- functions +----------------------------------------------------------------------]] + +------------------------------------------------------------------------ +-- initialization function +------------------------------------------------------------------------ + +function init(_option, _llex, _warn) + option = _option + llex = _llex + warn = _warn +end + +------------------------------------------------------------------------ +-- function to build lists containing a 'normal' lexer stream +------------------------------------------------------------------------ + +local function build_stream(s) + llex.init(s) + llex.llex() + local stok, sseminfo -- source list (with whitespace elements) + = llex.tok, llex.seminfo + local tok, seminfo -- processed list (real elements only) + = {}, {} + for i = 1, #stok do + local t = stok[i] + if is_realtoken[t] then + tok[#tok + 1] = t + seminfo[#seminfo + 1] = sseminfo[i] + end + end--for + return tok, seminfo +end + +------------------------------------------------------------------------ +-- test source (lexer stream) equivalence +------------------------------------------------------------------------ + +function source(z, dat) + -------------------------------------------------------------------- + -- function to return a dumped string for seminfo compares + -------------------------------------------------------------------- + local function dumpsem(s) + local sf = loadstring("return "..s, "z") + if sf then + return dump(sf) + end + end + -------------------------------------------------------------------- + -- mark and optionally report non-equivalence + -------------------------------------------------------------------- + local function bork(msg) + if option.DETAILS then base.print("SRCEQUIV: "..msg) end + warn.SRC_EQUIV = true + end + -------------------------------------------------------------------- + -- get lexer streams for both source strings, compare + -------------------------------------------------------------------- + local tok1, seminfo1 = build_stream(z) -- original + local tok2, seminfo2 = build_stream(dat) -- compressed + -------------------------------------------------------------------- + -- compare shbang lines ignoring EOL + -------------------------------------------------------------------- + local sh1 = match(z, "^(#[^\r\n]*)") + local sh2 = match(dat, "^(#[^\r\n]*)") + if sh1 or sh2 then + if not sh1 or not sh2 or sh1 ~= sh2 then + bork("shbang lines different") + end + end + -------------------------------------------------------------------- + -- compare by simple count + -------------------------------------------------------------------- + if #tok1 ~= #tok2 then + bork("count "..#tok1.." "..#tok2) + return + end + -------------------------------------------------------------------- + -- compare each element the best we can + -------------------------------------------------------------------- + for i = 1, #tok1 do + local t1, t2 = tok1[i], tok2[i] + local s1, s2 = seminfo1[i], seminfo2[i] + if t1 ~= t2 then -- by type + bork("type ["..i.."] "..t1.." "..t2) + break + end + if t1 == "TK_KEYWORD" or t1 == "TK_NAME" or t1 == "TK_OP" then + if t1 == "TK_NAME" and option["opt-locals"] then + -- can't compare identifiers of locals that are optimized + elseif s1 ~= s2 then -- by semantic info (simple) + bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2) + break + end + elseif t1 == "TK_EOS" then + -- no seminfo to compare + else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING" + -- compare 'binary' form, so dump a function + local s1b,s2b = dumpsem(s1), dumpsem(s2) + if not s1b or not s2b or s1b ~= s2b then + bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2) + break + end + end + end--for + -------------------------------------------------------------------- + -- successful comparison if end is reached with no borks + -------------------------------------------------------------------- +end + +------------------------------------------------------------------------ +-- test binary chunk equivalence +------------------------------------------------------------------------ + +function binary(z, dat) + local TNIL = 0 + local TBOOLEAN = 1 + local TNUMBER = 3 + local TSTRING = 4 + -------------------------------------------------------------------- + -- mark and optionally report non-equivalence + -------------------------------------------------------------------- + local function bork(msg) + if option.DETAILS then base.print("BINEQUIV: "..msg) end + warn.BIN_EQUIV = true + end + -------------------------------------------------------------------- + -- function to remove shbang line so that loadstring runs + -------------------------------------------------------------------- + local function zap_shbang(s) + local shbang = match(s, "^(#[^\r\n]*\r?\n?)") + if shbang then -- cut out shbang + s = sub(s, #shbang + 1) + end + return s + end + -------------------------------------------------------------------- + -- attempt to compile, then dump to get binary chunk string + -------------------------------------------------------------------- + local cz = loadstring(zap_shbang(z), "z") + if not cz then + bork("failed to compile original sources for binary chunk comparison") + return + end + local cdat = loadstring(zap_shbang(dat), "z") + if not cdat then + bork("failed to compile compressed result for binary chunk comparison") + end + -- if loadstring() works, dump assuming string.dump() is error-free + local c1 = { i = 1, dat = dump(cz) } + c1.len = #c1.dat + local c2 = { i = 1, dat = dump(cdat) } + c2.len = #c2.dat + -------------------------------------------------------------------- + -- support functions to handle binary chunk reading + -------------------------------------------------------------------- + local endian, + sz_int, sz_sizet, -- sizes of data types + sz_inst, sz_number, + getint, getsizet + -------------------------------------------------------------------- + local function ensure(c, sz) -- check if bytes exist + if c.i + sz - 1 > c.len then return end + return true + end + -------------------------------------------------------------------- + local function skip(c, sz) -- skip some bytes + if not sz then sz = 1 end + c.i = c.i + sz + end + -------------------------------------------------------------------- + local function getbyte(c) -- return a byte value + local i = c.i + if i > c.len then return end + local d = sub(c.dat, i, i) + c.i = i + 1 + return byte(d) + end + -------------------------------------------------------------------- + local function getint_l(c) -- return an int value (little-endian) + local n, scale = 0, 1 + if not ensure(c, sz_int) then return end + for j = 1, sz_int do + n = n + scale * getbyte(c) + scale = scale * 256 + end + return n + end + -------------------------------------------------------------------- + local function getint_b(c) -- return an int value (big-endian) + local n = 0 + if not ensure(c, sz_int) then return end + for j = 1, sz_int do + n = n * 256 + getbyte(c) + end + return n + end + -------------------------------------------------------------------- + local function getsizet_l(c) -- return a size_t value (little-endian) + local n, scale = 0, 1 + if not ensure(c, sz_sizet) then return end + for j = 1, sz_sizet do + n = n + scale * getbyte(c) + scale = scale * 256 + end + return n + end + -------------------------------------------------------------------- + local function getsizet_b(c) -- return a size_t value (big-endian) + local n = 0 + if not ensure(c, sz_sizet) then return end + for j = 1, sz_sizet do + n = n * 256 + getbyte(c) + end + return n + end + -------------------------------------------------------------------- + local function getblock(c, sz) -- return a block (as a string) + local i = c.i + local j = i + sz - 1 + if j > c.len then return end + local d = sub(c.dat, i, j) + c.i = i + sz + return d + end + -------------------------------------------------------------------- + local function getstring(c) -- return a string + local n = getsizet(c) + if not n then return end + if n == 0 then return "" end + return getblock(c, n) + end + -------------------------------------------------------------------- + local function goodbyte(c1, c2) -- compare byte value + local b1, b2 = getbyte(c1), getbyte(c2) + if not b1 or not b2 or b1 ~= b2 then + return + end + return b1 + end + -------------------------------------------------------------------- + local function badbyte(c1, c2) -- compare byte value + local b = goodbyte(c1, c2) + if not b then return true end + end + -------------------------------------------------------------------- + local function goodint(c1, c2) -- compare int value + local i1, i2 = getint(c1), getint(c2) + if not i1 or not i2 or i1 ~= i2 then + return + end + return i1 + end + -------------------------------------------------------------------- + -- recursively-called function to compare function prototypes + -------------------------------------------------------------------- + local function getfunc(c1, c2) + -- source name (ignored) + if not getstring(c1) or not getstring(c2) then + bork("bad source name"); return + end + -- linedefined (ignored) + if not getint(c1) or not getint(c2) then + bork("bad linedefined"); return + end + -- lastlinedefined (ignored) + if not getint(c1) or not getint(c2) then + bork("bad lastlinedefined"); return + end + if not (ensure(c1, 4) and ensure(c2, 4)) then + bork("prototype header broken") + end + -- nups (compared) + if badbyte(c1, c2) then + bork("bad nups"); return + end + -- numparams (compared) + if badbyte(c1, c2) then + bork("bad numparams"); return + end + -- is_vararg (compared) + if badbyte(c1, c2) then + bork("bad is_vararg"); return + end + -- maxstacksize (compared) + if badbyte(c1, c2) then + bork("bad maxstacksize"); return + end + -- code (compared) + local ncode = goodint(c1, c2) + if not ncode then + bork("bad ncode"); return + end + local code1 = getblock(c1, ncode * sz_inst) + local code2 = getblock(c2, ncode * sz_inst) + if not code1 or not code2 or code1 ~= code2 then + bork("bad code block"); return + end + -- constants (compared) + local nconst = goodint(c1, c2) + if not nconst then + bork("bad nconst"); return + end + for i = 1, nconst do + local ctype = goodbyte(c1, c2) + if not ctype then + bork("bad const type"); return + end + if ctype == TBOOLEAN then + if badbyte(c1, c2) then + bork("bad boolean value"); return + end + elseif ctype == TNUMBER then + local num1 = getblock(c1, sz_number) + local num2 = getblock(c2, sz_number) + if not num1 or not num2 or num1 ~= num2 then + bork("bad number value"); return + end + elseif ctype == TSTRING then + local str1 = getstring(c1) + local str2 = getstring(c2) + if not str1 or not str2 or str1 ~= str2 then + bork("bad string value"); return + end + end + end + -- prototypes (compared recursively) + local nproto = goodint(c1, c2) + if not nproto then + bork("bad nproto"); return + end + for i = 1, nproto do + if not getfunc(c1, c2) then + bork("bad function prototype"); return + end + end + -- debug information (ignored) + -- lineinfo (ignored) + local sizelineinfo1 = getint(c1) + if not sizelineinfo1 then + bork("bad sizelineinfo1"); return + end + local sizelineinfo2 = getint(c2) + if not sizelineinfo2 then + bork("bad sizelineinfo2"); return + end + if not getblock(c1, sizelineinfo1 * sz_int) then + bork("bad lineinfo1"); return + end + if not getblock(c2, sizelineinfo2 * sz_int) then + bork("bad lineinfo2"); return + end + -- locvars (ignored) + local sizelocvars1 = getint(c1) + if not sizelocvars1 then + bork("bad sizelocvars1"); return + end + local sizelocvars2 = getint(c2) + if not sizelocvars2 then + bork("bad sizelocvars2"); return + end + for i = 1, sizelocvars1 do + if not getstring(c1) or not getint(c1) or not getint(c1) then + bork("bad locvars1"); return + end + end + for i = 1, sizelocvars2 do + if not getstring(c2) or not getint(c2) or not getint(c2) then + bork("bad locvars2"); return + end + end + -- upvalues (ignored) + local sizeupvalues1 = getint(c1) + if not sizeupvalues1 then + bork("bad sizeupvalues1"); return + end + local sizeupvalues2 = getint(c2) + if not sizeupvalues2 then + bork("bad sizeupvalues2"); return + end + for i = 1, sizeupvalues1 do + if not getstring(c1) then bork("bad upvalues1"); return end + end + for i = 1, sizeupvalues2 do + if not getstring(c2) then bork("bad upvalues2"); return end + end + return true + end + -------------------------------------------------------------------- + -- parse binary chunks to verify equivalence + -- * for headers, handle sizes to allow a degree of flexibility + -- * assume a valid binary chunk is generated, since it was not + -- generated via external means + -------------------------------------------------------------------- + if not (ensure(c1, 12) and ensure(c2, 12)) then + bork("header broken") + end + skip(c1, 6) -- skip signature(4), version, format + endian = getbyte(c1) -- 1 = little endian + sz_int = getbyte(c1) -- get data type sizes + sz_sizet = getbyte(c1) + sz_inst = getbyte(c1) + sz_number = getbyte(c1) + skip(c1) -- skip integral flag + skip(c2, 12) -- skip other header (assume similar) + if endian == 1 then -- set for endian sensitive data we need + getint = getint_l + getsizet = getsizet_l + else + getint = getint_b + getsizet = getsizet_b + end + getfunc(c1, c2) -- get prototype at root + if c1.i ~= c1.len + 1 then + bork("inconsistent binary chunk1"); return + elseif c2.i ~= c2.len + 1 then + bork("inconsistent binary chunk2"); return + end + -------------------------------------------------------------------- + -- successful comparison if end is reached with no borks + -------------------------------------------------------------------- +end +--end of inserted module +end + +-- preload function for module plugin/html +preload["plugin/html"] = +function() +--start of inserted module +module "plugin/html" + +local string = base.require "string" +local table = base.require "table" +local io = base.require "io" + +------------------------------------------------------------------------ +-- constants and configuration +------------------------------------------------------------------------ + +local HTML_EXT = ".html" +local ENTITIES = { + ["&"] = "&", ["<"] = "<", [">"] = ">", + ["'"] = "'", ["\""] = """, +} + +-- simple headers and footers +local HEADER = [[ + + + +%s + + + + +
+]]
+local FOOTER = [[
+
+ + +]] +-- for more, please see wikimain.css from the Lua wiki site +local STYLESHEET = [[ +BODY { + background: white; + color: navy; +} +pre.code { color: black; } +span.comment { color: #00a000; } +span.string { color: #009090; } +span.keyword { color: black; font-weight: bold; } +span.number { color: #993399; } +span.operator { } +span.name { } +span.global { color: #ff0000; font-weight: bold; } +span.local { color: #0000ff; font-weight: bold; } +]] + +------------------------------------------------------------------------ +-- option handling, plays nice with --quiet option +------------------------------------------------------------------------ + +local option -- local reference to list of options +local srcfl, destfl -- filenames +local toklist, seminfolist, toklnlist -- token data + +local function print(...) -- handle quiet option + if option.QUIET then return end + base.print(...) +end + +------------------------------------------------------------------------ +-- initialization +------------------------------------------------------------------------ + +function init(_option, _srcfl, _destfl) + option = _option + srcfl = _srcfl + local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$") + local basename, extension = srcfl, "" + if extb and extb > 1 then + basename = string.sub(srcfl, 1, extb - 1) + extension = string.sub(srcfl, extb, exte) + end + destfl = basename..HTML_EXT + if option.OUTPUT_FILE then + destfl = option.OUTPUT_FILE + end + if srcfl == destfl then + base.error("output filename identical to input filename") + end +end + +------------------------------------------------------------------------ +-- message display, post-load processing +------------------------------------------------------------------------ + +function post_load(z) + print([[ +HTML plugin module for LuaSrcDiet +]]) + print("Exporting: "..srcfl.." -> "..destfl.."\n") +end + +------------------------------------------------------------------------ +-- post-lexing processing, can work on lexer table output +------------------------------------------------------------------------ + +function post_lex(_toklist, _seminfolist, _toklnlist) + toklist, seminfolist, toklnlist + = _toklist, _seminfolist, _toklnlist +end + +------------------------------------------------------------------------ +-- escape the usual suspects for HTML/XML +------------------------------------------------------------------------ + +local function do_entities(z) + local i = 1 + while i <= #z do + local c = string.sub(z, i, i) + local d = ENTITIES[c] + if d then + c = d + z = string.sub(z, 1, i - 1)..c..string.sub(z, i + 1) + end + i = i + #c + end--while + return z +end + +------------------------------------------------------------------------ +-- save source code to file +------------------------------------------------------------------------ + +local function save_file(fname, dat) + local OUTF = io.open(fname, "wb") + if not OUTF then base.error("cannot open \""..fname.."\" for writing") end + local status = OUTF:write(dat) + if not status then base.error("cannot write to \""..fname.."\"") end + OUTF:close() +end + +------------------------------------------------------------------------ +-- post-parsing processing, gives globalinfo, localinfo +------------------------------------------------------------------------ + +function post_parse(globalinfo, localinfo) + local html = {} + local function add(s) -- html helpers + html[#html + 1] = s + end + local function span(class, s) + add(''..s..'') + end + ---------------------------------------------------------------------- + for i = 1, #globalinfo do -- mark global identifiers as TK_GLOBAL + local obj = globalinfo[i] + local xref = obj.xref + for j = 1, #xref do + local p = xref[j] + toklist[p] = "TK_GLOBAL" + end + end--for + ---------------------------------------------------------------------- + for i = 1, #localinfo do -- mark local identifiers as TK_LOCAL + local obj = localinfo[i] + local xref = obj.xref + for j = 1, #xref do + local p = xref[j] + toklist[p] = "TK_LOCAL" + end + end--for + ---------------------------------------------------------------------- + add(string.format(HEADER, -- header and leading stuff + do_entities(srcfl), + STYLESHEET)) + for i = 1, #toklist do -- enumerate token list + local tok, info = toklist[i], seminfolist[i] + if tok == "TK_KEYWORD" then + span("keyword", info) + elseif tok == "TK_STRING" or tok == "TK_LSTRING" then + span("string", do_entities(info)) + elseif tok == "TK_COMMENT" or tok == "TK_LCOMMENT" then + span("comment", do_entities(info)) + elseif tok == "TK_GLOBAL" then + span("global", info) + elseif tok == "TK_LOCAL" then + span("local", info) + elseif tok == "TK_NAME" then + span("name", info) + elseif tok == "TK_NUMBER" then + span("number", info) + elseif tok == "TK_OP" then + span("operator", do_entities(info)) + elseif tok ~= "TK_EOS" then -- TK_EOL, TK_SPACE + add(info) + end + end--for + add(FOOTER) + save_file(destfl, table.concat(html)) + option.EXIT = true +end +--end of inserted module +end + +-- preload function for module plugin/sloc +preload["plugin/sloc"] = +function() +--start of inserted module +module "plugin/sloc" + +local string = base.require "string" +local table = base.require "table" + +------------------------------------------------------------------------ +-- initialization +------------------------------------------------------------------------ + +local option -- local reference to list of options +local srcfl -- source file name + +function init(_option, _srcfl, _destfl) + option = _option + option.QUIET = true + srcfl = _srcfl +end + +------------------------------------------------------------------------ +-- splits a block into a table of lines (minus EOLs) +------------------------------------------------------------------------ + +local function split(blk) + local lines = {} + local i, nblk = 1, #blk + while i <= nblk do + local p, q, r, s = string.find(blk, "([\r\n])([\r\n]?)", i) + if not p then + p = nblk + 1 + end + lines[#lines + 1] = string.sub(blk, i, p - 1) + i = p + 1 + if p < nblk and q > p and r ~= s then -- handle Lua-style CRLF, LFCR + i = i + 1 + end + end + return lines +end + +------------------------------------------------------------------------ +-- post-lexing processing, can work on lexer table output +------------------------------------------------------------------------ + +function post_lex(toklist, seminfolist, toklnlist) + local lnow, sloc = 0, 0 + local function chk(ln) -- if a new line, count it as an SLOC + if ln > lnow then -- new line # must be > old line # + sloc = sloc + 1; lnow = ln + end + end + for i = 1, #toklist do -- enumerate over all tokens + local tok, info, ln + = toklist[i], seminfolist[i], toklnlist[i] + -------------------------------------------------------------------- + if tok == "TK_KEYWORD" or tok == "TK_NAME" or -- significant + tok == "TK_NUMBER" or tok == "TK_OP" then + chk(ln) + -------------------------------------------------------------------- + -- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop + -- is needed in order to mark off lines one-by-one. Since llex.lua + -- currently returns the line number of the last part of the string, + -- we must subtract in order to get the starting line number. + -------------------------------------------------------------------- + elseif tok == "TK_STRING" then -- possible multi-line + local t = split(info) + ln = ln - #t + 1 + for j = 1, #t do + chk(ln); ln = ln + 1 + end + -------------------------------------------------------------------- + elseif tok == "TK_LSTRING" then -- possible multi-line + local t = split(info) + ln = ln - #t + 1 + for j = 1, #t do + if t[j] ~= "" then chk(ln) end + ln = ln + 1 + end + -------------------------------------------------------------------- + -- other tokens are comments or whitespace and are ignored + -------------------------------------------------------------------- + end + end--for + base.print(srcfl..": "..sloc) -- display result + option.EXIT = true +end +--end of inserted module +end + +-- support modules +local llex = require "llex" +local lparser = require "lparser" +local optlex = require "optlex" +local optparser = require "optparser" +local equiv = require "equiv" +local plugin + +--[[-------------------------------------------------------------------- +-- messages and textual data +----------------------------------------------------------------------]] + +local MSG_TITLE = [[ +LuaSrcDiet: Puts your Lua 5.1 source code on a diet +Version 0.12.1 (20120407) Copyright (c) 2012 Kein-Hong Man +The COPYRIGHT file describes the conditions under which this +software may be distributed. +]] + +local MSG_USAGE = [[ +usage: LuaSrcDiet [options] [filenames] + +example: + >LuaSrcDiet myscript.lua -o myscript_.lua + +options: + -v, --version prints version information + -h, --help prints usage information + -o specify file name to write output + -s suffix for output files (default '_') + --keep keep block comment with inside + --plugin run in plugin/ directory + - stop handling arguments + + (optimization levels) + --none all optimizations off (normalizes EOLs only) + --basic lexer-based optimizations only + --maximum maximize reduction of source + + (informational) + --quiet process files quietly + --read-only read file and print token stats only + --dump-lexer dump raw tokens from lexer to stdout + --dump-parser dump variable tracking tables from parser + --details extra info (strings, numbers, locals) + +features (to disable, insert 'no' prefix like --noopt-comments): +%s +default settings: +%s]] + +------------------------------------------------------------------------ +-- optimization options, for ease of switching on and off +-- * positive to enable optimization, negative (no) to disable +-- * these options should follow --opt-* and --noopt-* style for now +------------------------------------------------------------------------ + +local OPTION = [[ +--opt-comments,'remove comments and block comments' +--opt-whitespace,'remove whitespace excluding EOLs' +--opt-emptylines,'remove empty lines' +--opt-eols,'all above, plus remove unnecessary EOLs' +--opt-strings,'optimize strings and long strings' +--opt-numbers,'optimize numbers' +--opt-locals,'optimize local variable names' +--opt-entropy,'tries to reduce symbol entropy of locals' +--opt-srcequiv,'insist on source (lexer stream) equivalence' +--opt-binequiv,'insist on binary chunk equivalence' +--opt-experimental,'apply experimental optimizations' +]] + +-- preset configuration +local DEFAULT_CONFIG = [[ + --opt-comments --opt-whitespace --opt-emptylines + --opt-numbers --opt-locals + --opt-srcequiv --opt-binequiv +]] +-- override configurations +-- * MUST explicitly enable/disable everything for +-- total option replacement +local BASIC_CONFIG = [[ + --opt-comments --opt-whitespace --opt-emptylines + --noopt-eols --noopt-strings --noopt-numbers + --noopt-locals --noopt-entropy + --opt-srcequiv --opt-binequiv +]] +local MAXIMUM_CONFIG = [[ + --opt-comments --opt-whitespace --opt-emptylines + --opt-eols --opt-strings --opt-numbers + --opt-locals --opt-entropy + --opt-srcequiv --opt-binequiv +]] +local NONE_CONFIG = [[ + --noopt-comments --noopt-whitespace --noopt-emptylines + --noopt-eols --noopt-strings --noopt-numbers + --noopt-locals --noopt-entropy + --opt-srcequiv --opt-binequiv +]] + +local DEFAULT_SUFFIX = "_" -- default suffix for file renaming +local PLUGIN_SUFFIX = "plugin/" -- relative location of plugins + +--[[-------------------------------------------------------------------- +-- startup and initialize option list handling +----------------------------------------------------------------------]] + +-- simple error message handler; change to error if traceback wanted +local function die(msg) + print("LuaSrcDiet (error): "..msg); os.exit(1) +end +--die = error--DEBUG + +if not match(_VERSION, "5.1", 1, 1) then -- sanity check + die("requires Lua 5.1 to run") +end + +------------------------------------------------------------------------ +-- prepares text for list of optimizations, prepare lookup table +------------------------------------------------------------------------ + +local MSG_OPTIONS = "" +do + local WIDTH = 24 + local o = {} + for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do + local msg = " "..op + msg = msg..string.rep(" ", WIDTH - #msg)..desc.."\n" + MSG_OPTIONS = MSG_OPTIONS..msg + o[op] = true + o["--no"..sub(op, 3)] = true + end + OPTION = o -- replace OPTION with lookup table +end + +MSG_USAGE = string.format(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG) + +if p_embedded then -- embedded plugins + local EMBED_INFO = "\nembedded plugins:\n" + for i = 1, #p_embedded do + local p = p_embedded[i] + EMBED_INFO = EMBED_INFO.." "..plugin_info[p].."\n" + end + MSG_USAGE = MSG_USAGE..EMBED_INFO +end + +------------------------------------------------------------------------ +-- global variable initialization, option set handling +------------------------------------------------------------------------ + +local suffix = DEFAULT_SUFFIX -- file suffix +local option = {} -- program options +local stat_c, stat_l -- statistics tables + +-- function to set option lookup table based on a text list of options +-- note: additional forced settings for --opt-eols is done in optlex.lua +local function set_options(CONFIG) + for op in gmatch(CONFIG, "(%-%-%S+)") do + if sub(op, 3, 4) == "no" and -- handle negative options + OPTION["--"..sub(op, 5)] then + option[sub(op, 5)] = false + else + option[sub(op, 3)] = true + end + end +end + +--[[-------------------------------------------------------------------- +-- support functions +----------------------------------------------------------------------]] + +-- list of token types, parser-significant types are up to TTYPE_GRAMMAR +-- while the rest are not used by parsers; arranged for stats display +local TTYPES = { + "TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar + "TK_STRING", "TK_LSTRING", "TK_OP", + "TK_EOS", + "TK_COMMENT", "TK_LCOMMENT", -- non-grammar + "TK_EOL", "TK_SPACE", +} +local TTYPE_GRAMMAR = 7 + +local EOLTYPES = { -- EOL names for token dump + ["\n"] = "LF", ["\r"] = "CR", + ["\n\r"] = "LFCR", ["\r\n"] = "CRLF", +} + +------------------------------------------------------------------------ +-- read source code from file +------------------------------------------------------------------------ +local loaded_file_contents = "" +local saved_file_contents = "" + +local function load_file(fname) + local INF = io.open(fname, "rb") + if not INF then die('cannot open "'..fname..'" for reading') end + local dat = INF:read("*a") + if not dat then die('cannot read from "'..fname..'"') end + INF:close() + loaded_file_contents = dat + return dat +end + +------------------------------------------------------------------------ +-- save source code to file +------------------------------------------------------------------------ + +local function save_file(fname, dat) + saved_file_contents = dat +--[[ + local OUTF = io.open(fname, "wb") + if not OUTF then die('cannot open "'..fname..'" for writing') end + local status = OUTF:write(dat) + if not status then die('cannot write to "'..fname..'"') end + OUTF:close() + ]] +end + +------------------------------------------------------------------------ +-- functions to deal with statistics +------------------------------------------------------------------------ + +-- initialize statistics table +local function stat_init() + stat_c, stat_l = {}, {} + for i = 1, #TTYPES do + local ttype = TTYPES[i] + stat_c[ttype], stat_l[ttype] = 0, 0 + end +end + +-- add a token to statistics table +local function stat_add(tok, seminfo) + stat_c[tok] = stat_c[tok] + 1 + stat_l[tok] = stat_l[tok] + #seminfo +end + +-- do totals for statistics table, return average table +local function stat_calc() + local function avg(c, l) -- safe average function + if c == 0 then return 0 end + return l / c + end + local stat_a = {} + local c, l = 0, 0 + for i = 1, TTYPE_GRAMMAR do -- total grammar tokens + local ttype = TTYPES[i] + c = c + stat_c[ttype]; l = l + stat_l[ttype] + end + stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l + stat_a.TOTAL_TOK = avg(c, l) + c, l = 0, 0 + for i = 1, #TTYPES do -- total all tokens + local ttype = TTYPES[i] + c = c + stat_c[ttype]; l = l + stat_l[ttype] + stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype]) + end + stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l + stat_a.TOTAL_ALL = avg(c, l) + return stat_a +end + +--[[-------------------------------------------------------------------- +-- main tasks +----------------------------------------------------------------------]] + +------------------------------------------------------------------------ +-- a simple token dumper, minimal translation of seminfo data +------------------------------------------------------------------------ + +local function dump_tokens(srcfl) + -------------------------------------------------------------------- + -- load file and process source input into tokens + -------------------------------------------------------------------- + local z = load_file(srcfl) + llex.init(z) + llex.llex() + local toklist, seminfolist = llex.tok, llex.seminfo + -------------------------------------------------------------------- + -- display output + -------------------------------------------------------------------- + for i = 1, #toklist do + local tok, seminfo = toklist[i], seminfolist[i] + if tok == "TK_OP" and string.byte(seminfo) < 32 then + seminfo = "(".. string.byte(seminfo)..")" + elseif tok == "TK_EOL" then + seminfo = EOLTYPES[seminfo] + else + seminfo = "'"..seminfo.."'" + end + print(tok.." "..seminfo) + end--for +end + +---------------------------------------------------------------------- +-- parser dump; dump globalinfo and localinfo tables +---------------------------------------------------------------------- + +local function dump_parser(srcfl) + local print = print + -------------------------------------------------------------------- + -- load file and process source input into tokens + -------------------------------------------------------------------- + local z = load_file(srcfl) + llex.init(z) + llex.llex() + local toklist, seminfolist, toklnlist + = llex.tok, llex.seminfo, llex.tokln + -------------------------------------------------------------------- + -- do parser optimization here + -------------------------------------------------------------------- + lparser.init(toklist, seminfolist, toklnlist) + local xinfo = lparser.parser() + local globalinfo, localinfo = + xinfo.globalinfo, xinfo.localinfo + -------------------------------------------------------------------- + -- display output + -------------------------------------------------------------------- + local hl = string.rep("-", 72) + print("*** Local/Global Variable Tracker Tables ***") + print(hl.."\n GLOBALS\n"..hl) + -- global tables have a list of xref numbers only + for i = 1, #globalinfo do + local obj = globalinfo[i] + local msg = "("..i..") '"..obj.name.."' -> " + local xref = obj.xref + for j = 1, #xref do msg = msg..xref[j].." " end + print(msg) + end + -- local tables have xref numbers and a few other special + -- numbers that are specially named: decl (declaration xref), + -- act (activation xref), rem (removal xref) + print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl) + for i = 1, #localinfo do + local obj = localinfo[i] + local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl.. + " act:"..obj.act.." rem:"..obj.rem + if obj.isself then + msg = msg.." isself" + end + msg = msg.." -> " + local xref = obj.xref + for j = 1, #xref do msg = msg..xref[j].." " end + print(msg) + end + print(hl.."\n") +end + +------------------------------------------------------------------------ +-- reads source file(s) and reports some statistics +------------------------------------------------------------------------ + +local function read_only(srcfl) + local print = print + -------------------------------------------------------------------- + -- load file and process source input into tokens + -------------------------------------------------------------------- + local z = load_file(srcfl) + llex.init(z) + llex.llex() + local toklist, seminfolist = llex.tok, llex.seminfo + print(MSG_TITLE) + print("Statistics for: "..srcfl.."\n") + -------------------------------------------------------------------- + -- collect statistics + -------------------------------------------------------------------- + stat_init() + for i = 1, #toklist do + local tok, seminfo = toklist[i], seminfolist[i] + stat_add(tok, seminfo) + end--for + local stat_a = stat_calc() + -------------------------------------------------------------------- + -- display output + -------------------------------------------------------------------- + local fmt = string.format + local function figures(tt) + return stat_c[tt], stat_l[tt], stat_a[tt] + end + local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f" + local hl = string.rep("-", 42) + print(fmt(tabf1, "Lexical", "Input", "Input", "Input")) + print(fmt(tabf1, "Elements", "Count", "Bytes", "Average")) + print(hl) + for i = 1, #TTYPES do + local ttype = TTYPES[i] + print(fmt(tabf2, ttype, figures(ttype))) + if ttype == "TK_EOS" then print(hl) end + end + print(hl) + print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL"))) + print(hl) + print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK"))) + print(hl.."\n") +end + +------------------------------------------------------------------------ +-- process source file(s), write output and reports some statistics +------------------------------------------------------------------------ + +local function process_file(srcfl, destfl) + local function print(...) -- handle quiet option + if option.QUIET then return end + _G.print(...) + end + if plugin and plugin.init then -- plugin init + option.EXIT = false + plugin.init(option, srcfl, destfl) + if option.EXIT then return end + end + print(MSG_TITLE) -- title message + -------------------------------------------------------------------- + -- load file and process source input into tokens + -------------------------------------------------------------------- + local z = load_file(srcfl) + if plugin and plugin.post_load then -- plugin post-load + z = plugin.post_load(z) or z + if option.EXIT then return end + end + llex.init(z) + llex.llex() + local toklist, seminfolist, toklnlist + = llex.tok, llex.seminfo, llex.tokln + if plugin and plugin.post_lex then -- plugin post-lex + plugin.post_lex(toklist, seminfolist, toklnlist) + if option.EXIT then return end + end + -------------------------------------------------------------------- + -- collect 'before' statistics + -------------------------------------------------------------------- + stat_init() + for i = 1, #toklist do + local tok, seminfo = toklist[i], seminfolist[i] + stat_add(tok, seminfo) + end--for + local stat1_a = stat_calc() + local stat1_c, stat1_l = stat_c, stat_l + -------------------------------------------------------------------- + -- do parser optimization here + -------------------------------------------------------------------- + optparser.print = print -- hack + lparser.init(toklist, seminfolist, toklnlist) + local xinfo = lparser.parser() + if plugin and plugin.post_parse then -- plugin post-parse + plugin.post_parse(xinfo.globalinfo, xinfo.localinfo) + if option.EXIT then return end + end + optparser.optimize(option, toklist, seminfolist, xinfo) + if plugin and plugin.post_optparse then -- plugin post-optparse + plugin.post_optparse() + if option.EXIT then return end + end + -------------------------------------------------------------------- + -- do lexer optimization here, save output file + -------------------------------------------------------------------- + local warn = optlex.warn -- use this as a general warning lookup + optlex.print = print -- hack + toklist, seminfolist, toklnlist + = optlex.optimize(option, toklist, seminfolist, toklnlist) + if plugin and plugin.post_optlex then -- plugin post-optlex + plugin.post_optlex(toklist, seminfolist, toklnlist) + if option.EXIT then return end + end + local dat = table.concat(seminfolist) + -- depending on options selected, embedded EOLs in long strings and + -- long comments may not have been translated to \n, tack a warning + if string.find(dat, "\r\n", 1, 1) or + string.find(dat, "\n\r", 1, 1) then + warn.MIXEDEOL = true + end + -------------------------------------------------------------------- + -- test source and binary chunk equivalence + -------------------------------------------------------------------- + equiv.init(option, llex, warn) + equiv.source(z, dat) + equiv.binary(z, dat) + local smsg = "before and after lexer streams are NOT equivalent!" + local bmsg = "before and after binary chunks are NOT equivalent!" + -- for reporting, die if option was selected, else just warn + if warn.SRC_EQUIV then + if option["opt-srcequiv"] then die(smsg) end + else + print("*** SRCEQUIV: token streams are sort of equivalent") + if option["opt-locals"] then + print("(but no identifier comparisons since --opt-locals enabled)") + end + print() + end + if warn.BIN_EQUIV then + if option["opt-binequiv"] then die(bmsg) end + else + print("*** BINEQUIV: binary chunks are sort of equivalent") + print() + end + -------------------------------------------------------------------- + -- save optimized source stream to output file + -------------------------------------------------------------------- + save_file(destfl, dat) + -------------------------------------------------------------------- + -- collect 'after' statistics + -------------------------------------------------------------------- + stat_init() + for i = 1, #toklist do + local tok, seminfo = toklist[i], seminfolist[i] + stat_add(tok, seminfo) + end--for + local stat_a = stat_calc() + -------------------------------------------------------------------- + -- display output + -------------------------------------------------------------------- + print("Statistics for: "..srcfl.." -> "..destfl.."\n") + local fmt = string.format + local function figures(tt) + return stat1_c[tt], stat1_l[tt], stat1_a[tt], + stat_c[tt], stat_l[tt], stat_a[tt] + end + local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s", + "%-16s%8d%8d%10.2f%8d%8d%10.2f" + local hl = string.rep("-", 68) + print("*** lexer-based optimizations summary ***\n"..hl) + print(fmt(tabf1, "Lexical", + "Input", "Input", "Input", + "Output", "Output", "Output")) + print(fmt(tabf1, "Elements", + "Count", "Bytes", "Average", + "Count", "Bytes", "Average")) + print(hl) + for i = 1, #TTYPES do + local ttype = TTYPES[i] + print(fmt(tabf2, ttype, figures(ttype))) + if ttype == "TK_EOS" then print(hl) end + end + print(hl) + print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL"))) + print(hl) + print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK"))) + print(hl) + -------------------------------------------------------------------- + -- report warning flags from optimizing process + -------------------------------------------------------------------- + if warn.LSTRING then + print("* WARNING: "..warn.LSTRING) + elseif warn.MIXEDEOL then + print("* WARNING: ".."output still contains some CRLF or LFCR line endings") + elseif warn.SRC_EQUIV then + print("* WARNING: "..smsg) + elseif warn.BIN_EQUIV then + print("* WARNING: "..bmsg) + end + print() +end + +--[[-------------------------------------------------------------------- +-- main functions +----------------------------------------------------------------------]] + +--[[ +local arg = {...} -- program arguments +local fspec = {} +set_options(DEFAULT_CONFIG) -- set to default options at beginning + +------------------------------------------------------------------------ +-- per-file handling, ship off to tasks +------------------------------------------------------------------------ + +local function do_files(fspec) + for i = 1, #fspec do + local srcfl = fspec[i] + local destfl + ------------------------------------------------------------------ + -- find and replace extension for filenames + ------------------------------------------------------------------ + local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$") + local basename, extension = srcfl, "" + if extb and extb > 1 then + basename = sub(srcfl, 1, extb - 1) + extension = sub(srcfl, extb, exte) + end + destfl = basename..suffix..extension + if #fspec == 1 and option.OUTPUT_FILE then + destfl = option.OUTPUT_FILE + end + if srcfl == destfl then + die("output filename identical to input filename") + end + ------------------------------------------------------------------ + -- perform requested operations + ------------------------------------------------------------------ + if option.DUMP_LEXER then + dump_tokens(srcfl) + elseif option.DUMP_PARSER then + dump_parser(srcfl) + elseif option.READ_ONLY then + read_only(srcfl) + else + process_file(srcfl, destfl) + end + end--for +end + +------------------------------------------------------------------------ +-- main function (entry point is after this definition) +------------------------------------------------------------------------ + +local function main() + local argn, i = #arg, 1 + if argn == 0 then + option.HELP = true + end + -------------------------------------------------------------------- + -- handle arguments + -------------------------------------------------------------------- + while i <= argn do + local o, p = arg[i], arg[i + 1] + local dash = match(o, "^%-%-?") + if dash == "-" then -- single-dash options + if o == "-h" then + option.HELP = true; break + elseif o == "-v" then + option.VERSION = true; break + elseif o == "-s" then + if not p then die("-s option needs suffix specification") end + suffix = p + i = i + 1 + elseif o == "-o" then + if not p then die("-o option needs a file name") end + option.OUTPUT_FILE = p + i = i + 1 + elseif o == "-" then + break -- ignore rest of args + else + die("unrecognized option "..o) + end + elseif dash == "--" then -- double-dash options + if o == "--help" then + option.HELP = true; break + elseif o == "--version" then + option.VERSION = true; break + elseif o == "--keep" then + if not p then die("--keep option needs a string to match for") end + option.KEEP = p + i = i + 1 + elseif o == "--plugin" then + if not p then die("--plugin option needs a module name") end + if option.PLUGIN then die("only one plugin can be specified") end + option.PLUGIN = p + plugin = require(PLUGIN_SUFFIX..p) + i = i + 1 + elseif o == "--quiet" then + option.QUIET = true + elseif o == "--read-only" then + option.READ_ONLY = true + elseif o == "--basic" then + set_options(BASIC_CONFIG) + elseif o == "--maximum" then + set_options(MAXIMUM_CONFIG) + elseif o == "--none" then + set_options(NONE_CONFIG) + elseif o == "--dump-lexer" then + option.DUMP_LEXER = true + elseif o == "--dump-parser" then + option.DUMP_PARSER = true + elseif o == "--details" then + option.DETAILS = true + elseif OPTION[o] then -- lookup optimization options + set_options(o) + else + die("unrecognized option "..o) + end + else + fspec[#fspec + 1] = o -- potential filename + end + i = i + 1 + end--while + if option.HELP then + print(MSG_TITLE..MSG_USAGE); return true + elseif option.VERSION then + print(MSG_TITLE); return true + end + if #fspec > 0 then + if #fspec > 1 and option.OUTPUT_FILE then + die("with -o, only one source file can be specified") + end + do_files(fspec) + return true + else + die("nothing to do!") + end +end + +-- entry point -> main() -> do_files() +if not main() then + die("Please run with option -h or --help for usage information") +end +]] +-- end of script + +-- Start of premake4 customizations +function get_slim_luasrc(fname) + set_options(DEFAULT_CONFIG) + set_options(MAXIMUM_CONFIG) + option.OUTPUT_FILE = "-" + option.QUIET = true + process_file(fname, "-") + return saved_file_contents or "", loaded_file_contents +end diff --git a/scripts/luasrcdiet/README.LuaSrcDiet b/scripts/luasrcdiet/README.LuaSrcDiet new file mode 100644 index 00000000..950ceffc --- /dev/null +++ b/scripts/luasrcdiet/README.LuaSrcDiet @@ -0,0 +1,140 @@ + + LuaSrcDiet + Compresses Lua source code by removing unnecessary characters. + + Copyright (c) 2005-2008,2011,2012 Kein-Hong Man + The COPYRIGHT file describes the conditions + under which this software may be distributed. + + http://code.google.com/p/luasrcdiet/ + +======================================================================== + +WHAT'S NEW IN VERSION 0.12.1 +---------------------------- + +* Fixed a long comment glitch when using the --keep option. (Two + extra characters were duplicated before the ending brackets.) + +* Faster function call syntax sugar optimization using a one-pass + token deletion loop. + +WHAT'S NEW IN VERSION 0.12.0 +---------------------------- + +* Added single-file versions of LuaSrcDiet in various sizes. First + done by some other projects that packaged LuaSrcDiet, e.g. eLua. + +* BUG FIX: String optimization of "\ddd" type escape mechanism, + "\00101" was incorrectly optimized to "\101". + +* --opt-srcequiv: Source equivalence checking. Tries hard to compare + 'before' and 'after' lexer token streams for equivalence. + +* --opt-binequiv: Binary chunk equivalence checking. Tries hard to + compare 'before' and 'after' binary chunks for equivalence. + +* When using --opt-eols, the last EOL character is now removed. + +* --opt-experimental: Turns on a few experimental optimizations: + (a) ';' operator removal (deleted or turned into whitespace). + (b) f("string") f('string') f([[string]]) calls are turned + into their syntactic sugar equivalents, e.g. f"string" + +* Plugins are now embedded into single-file versions. + +* First release of completed documentation files. + +* New Makefile and numerous minor updates. + +* Old code for Lua 5.0 removed. + +BUGS +---- + +* Nothing in my list. See below for limitations... + +INCOMPLETE SUPPORT +------------------ + +* Locals optimization does NOT understand implicit 'arg' locals in + vararg functions (see option LUA_COMPAT_VARARG in the Lua sources). + +* NO support in lexer for decimal points other than '.'. + +* NO support in lexer for Lua 5.0.x nested long strings. + +EXPERIMENTAL SOFTWARE +--------------------- + +LuaSrcDiet is "experimental software". For LuaSrcDiet, this means that +it was coded for one user -- the coder. Although I may be able to help +LuaSrcDiet users, there should not be any expectation of 'support'. + +Don't hook this thing up to nuclear missiles. + +I don't have the time for steady maintenance or for building up and +cultivating a user base, so developers are welcome to fork LuaSrcDiet or +incorporate it into their own software, as long as authorship +attribution for LuaSrcDiet source code is maintained. Say if LuaSrcDiet +is called as a separate program, then it is simply an aggregation of +separate software and each program should stick to its own license. + +Programs you process using LuaSrcDiet are of course not affected at all +by LuaSrcDiet's license; it's just a text filter. See COPYRIGHT. If you +insist on extreme COPYRIGHT views, then better delete this whole thing +right away, then gouge your eyes out. :-p + +OLDER STUFF +----------- + +There has been some slash-and-burn going on. I'm inclined to move +forward, and not spend time maintaining older stuff forever. If you +still need the older stuff, they can be found in: + +* Lua 5.0.x old versions: last seen in version 0.11.2. + +* Lua 5.1.x old codebase: last seen in version 0.11.2. + +FUTURE PLANS +------------ + +Lua 5.1.x releases for LuaSrcDiet will pretty much stagnate at 0.12.1 +after implementation of a couple more experimental optimizations, and +effort will be shifted towards something for Lua 5.2.x. The timeline for +this is indeterminate. + +======================================================================== + +USING LUASRCDIET + +Now is a good time to take a look at the documentation. Start with +LuaSrcDiet.html in the doc directory. + +LuaSrcDiet is now packaged as a single-file Lua script for maximum +convenience. Just drop it in and splice something into your Makefile. + +New source stream and binary chunk equivalence checking minimizes the +possibility of LuaSrcDiet borking your stuff. + +======================================================================== + +ACKNOWLEDGEMENTS + +Coded using SciTE. Developed mostly under Cygwin with a generic Lua +5.1.4 binary. + +======================================================================== + +FEEDBACK + +Feedback and contributions are welcome. Your name will be acknowledged, +as long as you are willing to comply with COPYRIGHT. If your material is +self-contained, you can retain a copyright notice for those material in +your own name, as long as you use the same Lua 5/MIT-style copyright. + +Enjoy! + +Kein-Hong Man (esq.) +Kuala Lumpur +Malaysia 20120407 diff --git a/scripts/luasrcdiet/README.premake b/scripts/luasrcdiet/README.premake new file mode 100644 index 00000000..859d7621 --- /dev/null +++ b/scripts/luasrcdiet/README.premake @@ -0,0 +1,12 @@ +This is a patched up version of LuaSrcDiet 0.12.1. + +The following changes were implemented: + +* added local variables 'loaded_file_contents' and 'saved_file_contents'. +* overriding load_file() to store the loaded file contents in above mentioned + variable. +* overriding save_file() to store trimmed down source in above mentioned + variable. + ATTENTION: there are multiple functions of that name! +* commented out the "main functions" (search for this text). +* added function get_slim_luasrc() to return trimmed down code. From 4caaa3c783b8db80bafac48ac9f37e854abfd795 Mon Sep 17 00:00:00 2001 From: Oliver Schneider Date: Sun, 20 Feb 2022 23:43:06 +0000 Subject: [PATCH 4/4] Attempting to solve a glitch with VS version selector It appears the true string in modern versions is "Visual Studio Version " not just "Visual Studio ". Still need to figure out which of the VS versions changed it. --- src/actions/vstudio/vs2005_solution.lua | 5 +-- tests/actions/vstudio/sln2005/header.lua | 44 ++++++++++++------------ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/actions/vstudio/vs2005_solution.lua b/src/actions/vstudio/vs2005_solution.lua index bbfa6853..07441f1e 100644 --- a/src/actions/vstudio/vs2005_solution.lua +++ b/src/actions/vstudio/vs2005_solution.lua @@ -41,9 +41,10 @@ local action = premake.action.current() _p('Microsoft Visual Studio Solution File, Format Version %d.00', action.vstudio.solutionVersion) if action.vstudio.shortSlnVersion ~= nil then - _p('# Visual Studio %s', action.vstudio.shortSlnVersion) + _p('# Visual Studio Version %s', action.vstudio.shortSlnVersion) + _p('VisualStudioVersion = %s.0.0.0', action.vstudio.shortSlnVersion) else - _p('# Visual Studio %s', _ACTION:sub(3)) + _p('# Visual Studio %s', _ACTION:sub(3)) end end diff --git a/tests/actions/vstudio/sln2005/header.lua b/tests/actions/vstudio/sln2005/header.lua index 61aae752..1b5ae62e 100755 --- a/tests/actions/vstudio/sln2005/header.lua +++ b/tests/actions/vstudio/sln2005/header.lua @@ -77,12 +77,12 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 ]] --[[ -VS 2013 seems to add something like: +VS 2013 seems to add something like (not mandatory): -VisualStudioVersion = 12.0.31101.0 +VisualStudioVersion = 12.0.0.0 MinimumVisualStudioVersion = 10.0.40219.1 -which don't seem to be mandatory, though. +which seems to be used by the Visual Studio Version Selector to launch the correct VS. ]] end @@ -91,15 +91,15 @@ which don't seem to be mandatory, though. prepare() test.capture [[ Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 14 - ]] +# Visual Studio Version 14 +VisualStudioVersion = 14.]] --[[ -VS 2015 seems to add something like: +VS 2015 seems to add something like (not mandatory): -VisualStudioVersion = 14.0.23107.0 +VisualStudioVersion = 14.0.0.0 MinimumVisualStudioVersion = 10.0.40219.1 -which don't seem to be mandatory, though. +which seems to be used by the Visual Studio Version Selector to launch the correct VS. ]] end @@ -108,15 +108,15 @@ which don't seem to be mandatory, though. prepare() test.capture [[ Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 - ]] +# Visual Studio Version 15 +VisualStudioVersion = 15.]] --[[ -VS 2017 seems to add something like: +VS 2017 seems to add something like (not mandatory): -VisualStudioVersion = 15.0.26228.4 +VisualStudioVersion = 15.0.0.0 MinimumVisualStudioVersion = 10.0.40219.1 -which don't seem to be mandatory, though. +which seems to be used by the Visual Studio Version Selector to launch the correct VS. ]] end @@ -125,15 +125,15 @@ which don't seem to be mandatory, though. prepare() test.capture [[ Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 16 - ]] +# Visual Studio Version 16 +VisualStudioVersion = 16.]] --[[ -VS 2019 seems to add something like: +VS 2019 seems to add something like (not mandatory): -VisualStudioVersion = 16.0.29411.108 +VisualStudioVersion = 16.0.0.0 MinimumVisualStudioVersion = 10.0.40219.1 -which don't seem to be mandatory, though. +which seems to be used by the Visual Studio Version Selector to launch the correct VS. ]] end @@ -142,14 +142,14 @@ which don't seem to be mandatory, though. prepare() test.capture [[ Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 17 - ]] +# Visual Studio Version 17 +VisualStudioVersion = 17.]] --[[ -VS 2022 seems to add something like: +VS 2022 seems to add something like (not mandatory): VisualStudioVersion = 17.0.0.0 MinimumVisualStudioVersion = 10.0.40219.1 -which don't seem to be mandatory, though. +which seems to be used by the Visual Studio Version Selector to launch the correct VS. ]] end