From 4e5ab820f7de548f6f282a31428211aeeea510ee Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Thu, 31 Jul 2025 14:29:57 +0200 Subject: [PATCH 01/10] serialize_py: init --- serialize_py/codegen_result.py | 59 + serialize_py/kaitai_serialize_codegen.py | 460 +++++ serialize_py/kaitai_serialize_manual.py | 410 +++++ serialize_py/kaitaistruct.py | 1003 ++++++++++ serialize_py/kaitaistruct_sqlite3.py | 2120 ++++++++++++++++++++++ serialize_py/kaitaistruct_sqlite3.py.sh | 28 + serialize_py/pyvlq.py | 43 + serialize_py/shell.nix | 34 + serialize_py/sqlite3.ksy | 699 +++++++ serialize_py/vlq_base128_be.ksy | 55 + serialize_py/vlq_base128_be.py | 144 ++ 11 files changed, 5055 insertions(+) create mode 100644 serialize_py/codegen_result.py create mode 100755 serialize_py/kaitai_serialize_codegen.py create mode 100755 serialize_py/kaitai_serialize_manual.py create mode 100644 serialize_py/kaitaistruct.py create mode 100644 serialize_py/kaitaistruct_sqlite3.py create mode 100755 serialize_py/kaitaistruct_sqlite3.py.sh create mode 100644 serialize_py/pyvlq.py create mode 100644 serialize_py/shell.nix create mode 100644 serialize_py/sqlite3.ksy create mode 100644 serialize_py/vlq_base128_be.ksy create mode 100644 serialize_py/vlq_base128_be.py diff --git a/serialize_py/codegen_result.py b/serialize_py/codegen_result.py new file mode 100644 index 0000000..b317cb5 --- /dev/null +++ b/serialize_py/codegen_result.py @@ -0,0 +1,59 @@ +import io +import kaitaistruct +import kaitaistruct_sqlite3 + +root_size = 8192 + +def get_root(_io=None, check=True): + if not _io: + _io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size))) + root = kaitaistruct_sqlite3.Sqlite3(_io) + root.header = kaitaistruct_sqlite3.Sqlite3.DatabaseHeader(root._io, root, root._root) + header = root.header + def init_header(header): + header.magic = b'SQLite format 3\x00' + header.page_size_raw = 4096 # 0x1000 + header.write_version = kaitaistruct_sqlite3.Sqlite3.FormatVersion.legacy # 1 + header.read_version = kaitaistruct_sqlite3.Sqlite3.FormatVersion.legacy # 1 + header.page_reserved_space_size = 0 + header.max_payload_fraction = 64 # 0x40 + header.min_payload_fraction = 32 # 0x20 + header.leaf_payload_fraction = 32 # 0x20 + header.file_change_counter = 1 + header.num_pages = 2 + header.first_freelist_trunk_page = kaitaistruct_sqlite3.Sqlite3.FreelistTrunkPagePointer(root._io, header, header._root) + first_freelist_trunk_page = header.first_freelist_trunk_page + def init_first_freelist_trunk_page(first_freelist_trunk_page): + first_freelist_trunk_page.page_number = 0 + init_first_freelist_trunk_page(first_freelist_trunk_page) + header.num_freelist_pages = 0 + header.schema_cookie = 1 + header.schema_format = 4 + header.default_page_cache_size = 0 + header.largest_root_page = 0 + header.text_encoding = 1 + header.user_version = 0 + header.is_incremental_vacuum = 0 + header.application_id = 0 + header.reserved_header_bytes = 20 * b'\x00' + header.version_valid_for = 1 + header.sqlite_version_number = 3050001 # 0x2e8a11 + init_header(header) + if check: + root._check() + return root + +def get_io(): + root = get_root() + _io = root._io + # no. _write calls _fetch_instances which throws + # root._write(_io) + root._write__seq(_io) + # root._fetch_instances() # this would throw + root._io.write_back_child_streams() + return _io + +def get_bytes(): + _io = get_io() + _io.seek(0) + return _io.read_bytes_full() diff --git a/serialize_py/kaitai_serialize_codegen.py b/serialize_py/kaitai_serialize_codegen.py new file mode 100755 index 0000000..a3ee65f --- /dev/null +++ b/serialize_py/kaitai_serialize_codegen.py @@ -0,0 +1,460 @@ +#!/usr/bin/env python3 + +# codegen: parse from data to code +# https://github.com/kaitai-io/kaitai_struct/issues/1244 + +good_database_path = "test_kaitai.py.good.db" +codegen_database_path = "test_kaitai.py.codegen.db" + +import io +import re +import os +import sys +import shlex +import inspect +import subprocess + +# https://github.com/kaitai-io/kaitai_struct_python_runtime +import kaitaistruct + +import kaitaistruct_sqlite3 +import vlq_base128_be +import pyvlq # https://github.com/osoken/pyvlq/blob/main/src/pyvlq/core.py + +# create a lazy list class +# accessing root.pages[i] will call pages_list.__getitem__(i) +class PagesList: + def __init__(self, root): + self.root = root + def __len__(self): + return self.root.header.num_pages + def __getitem__(self, i): # i is 0-based + root = self.root + header = root.header + if i < 0: # -1 means last page, etc + i = header.num_pages + i + assert ( + 0 <= i and i < header.num_pages + ), f"page index is out of range: {i} is not in (0, {header.num_pages - 1})" + # todo: maybe cache page + # equality test: page_a.page_number == page_b.page_number + # FIXME handle root page: i == 0 + _pos = root._io.pos() + if i == 0: + # The first 100 bytes of the database file comprise the database file header + root._io.seek(100) + else: + root._io.seek(i * header.page_size) + if 1: + # use same _io + _io = root._io + else: + # FIXME this is a waste of memory. we need a slice of root._io (aka child_stream?) + # use a copy of _io + page_size = header.page_size if i > 0 else (header.page_size - 100) + # print(dir(root._io)) + _io = kaitaistruct.KaitaiStream(io.BytesIO(root._io.read_bytes(page_size))) + n = i + 1 # page number + if i == header.idx_lock_byte_page: + page = kaitaistruct_sqlite3.Sqlite3.LockBytePage(n, _io, root, root._root) + elif ( + i >= header.idx_first_ptrmap_page and + i <= header.idx_last_ptrmap_page + ): + page = kaitaistruct_sqlite3.Sqlite3.PtrmapPage(n, _io, root, root._root) + else: + page = kaitaistruct_sqlite3.Sqlite3.BtreePage(n, _io, root, root._root) + # FIXME this fails on i == 0 + # kaitaistruct.ValidationNotEqualError: /types/database_header/seq/0: at pos 116: + # validation failed: not equal, expected b'SQLite format 3\x00', + # but got b'\r\x00\x00\x00\x01\x0f\xcc\x00\x0f\xcc\x00\x00\x00\x00\x00\x00' + # page._read() + root._io.seek(_pos) + return page + +def print_value(on_kn): + print(f"{on_kn} = ", end="") + try: + val = eval(on_kn) + print(repr(val)) + except Exception as exc: print("error:", exc) + +def get_keys(obj): + # wontfix? this should only return "seq" keys, not "instances" keys + keys = dir(obj) + def f(k): + if k[0] == "_": return False + if "A" <= k[0] <= "Z": return False + if k in ("close", "from_bytes", "from_file", "from_io", "pages__to_write"): return False + return True + keys = list(filter(f, keys)) + return keys + +def get_seq(obj): + # TODO upstream: this should be simpler + if not hasattr(obj, "_read"): + return [] + _read = getattr(obj, "_read") + lines, firstlineno = inspect.getsourcelines(_read) + lines.pop(0) # "def _read(self):" + seq = [] + for line in lines: + line = line.rstrip() + # print("line", line) + # builtin types + # self.magic = self._io.read_bytes(16) + m = re.match(r"\s+self\.(\w+) = self\._io\.read_(\w+)\((.*)\)", line) + if m: + # key, _type, args = m.groups() + seq.append(m[1]) + continue + # enum types + # self.read_version = KaitaiStream.resolve_enum(Sqlite3.FormatVersion, self._io.read_u1()) + m = re.match(r"\s+self\.(\w+) = KaitaiStream\.resolve_enum\((\w+)\.(\w+), self\._io\.read_(\w+)\((.*)\)\)", line) + if m: + # key, enum_mod, enum_name, _type, args = m.groups() + seq.append(m[1]) + continue + # list type + m = re.match(r"\s+self\.(\w+) = \[\]", line) + if m: + seq.append(m[1]) + continue + # user-defined types + # self.header = Sqlite3.DatabaseHeader(self._io, self, self._root) + m = re.match(r"\s+self\.(\w+) = (\w+)\.(\w+)\(self\._io, self, self\._root\)", line) + if m: + # print("m", m.groups()) + # key, mod, member = m.groups() + seq.append(m[1]) + continue + return seq + +def parse_enum_map(lines): + enum_map = dict() + line0 = lines.pop(0) + for line in lines: + line = line.rstrip() + m = re.match(r"\s+(\w+) = (\d+)", line) + if m: + key, val = m.groups() + # TODO are there non-int enum types? + val = int(val) + # enum_map[key] = val + enum_map[val] = key + continue + print(f"FIXME parse_enum_map failed to parse line {line!r}") + return enum_map + +def get_local_key(key, global_names): + num = 1 + local_key = key + while local_key in global_names: + if num == 1: + local_key = f"local_{key}" + else: + local_key = f"local_{key}_{num}" + num += 1 + return local_key + +if not os.path.exists(good_database_path): + args = [ + "sqlite3", + good_database_path, + "create table test (id INTEGER)", + ] + print(">", shlex.join(args)) + subprocess.run(args) + +# create a database parser +root = kaitaistruct_sqlite3.Sqlite3.from_file(good_database_path) + +# patch the internal cache attribute of root.pages +# root._m_pages = PagesList(root) + +root._read() + +# print("root.header.magic", root.header.magic) +# now, this will parse **only** the first page +# fix: 'BtreePage' object has no attribute 'cell_pointers' + +if 1: + # print("root.pages[0] keys:", get_keys(root.pages[0])) + # print("root.pages[0] seq:", get_seq(root.pages[0])) + # FIXME kaitaistruct.ValidationNotEqualError: /types/database_header/seq/0: at pos 116: validation failed: not equal, + # expected b'SQLite format 3\x00', but got b'\r\x00\x00\x00\x01\x0f\xcc\x00\x0f\xcc\x00\x00\x00\x00\x00\x00' + print("root.pages[0]._read()"); root.pages[0]._read() + # print("root.pages[0] keys:", get_keys(root.pages[0])) + # print("root.pages[0] seq:", get_seq(root.pages[0])) + print_value("root.pages[0]") + # FIXME root.pages[0].page_type = error: 'BtreePage' object has no attribute 'page_type' + print_value("root.pages[0].page_type") + print_value("root.pages[0].num_cells") + print_value("root.pages[0].cell_pointers[0]") + print_value("root.pages[0].cell_pointers[0].ofs_content") +if 1: + # print("root.pages[1] keys:", get_keys(root.pages[1])) + # print("root.pages[1] seq:", get_seq(root.pages[1])) + print("root.pages[1]._read()"); root.pages[1]._read() + # print("root.pages[1] keys:", get_keys(root.pages[1])) + print("root.pages[1] seq:", get_seq(root.pages[1])) + print_value("root.pages[1]") + print_value("root.pages[1].page_type") + print_value("root.pages[1].num_cells") + # FIXME error: 'BtreePage' object has no attribute 'cell_pointers' + print_value("root.pages[1].cell_pointers[0]") + print_value("root.pages[1].cell_pointers[0].ofs_content") +# print(cell.content, dir(cell.content)) + +r""" +res = io.StringIO() +on_root = "root" +on = on_root +on_parent = on_root +root_class_name = root.__class__.__name__ +""" + + +r""" +mod = root.__class__.__module__ # "kaitaistruct_sqlite3" +mod = root_class_name # "Sqlite3" +mod = module_map.get(mod, mod) +print(f"{on_root} = {mod}.{root_class_name}()", file=res) +print_value("inspect.getsourcelines(root._read)") +print_value("inspect.getclosurevars(root._read)") +print_value("inspect.unwrap(root._read)") +print_value("inspect.get_annotations(root._read)") +""" + +r''' +lines, firstlineno = inspect.getsourcelines(root._read) +lines.pop(0) # "def _read(self):" +for line in lines: + line = line.rstrip() + print("line", line) + m = re.match(r"\s+self\.(\w+) = (\w+)\.(\w+)\(self\._io, self, self\._root\)", line) + if m: + print("m", m.groups()) + key, mod, member = m.groups() + mod = module_map.get(mod, mod) + # https://doc.kaitai.io/serialization.html#_user_defined_types + """ + if on == "root": + print(f"{on}.{key} = {mod}.{member}()", file=res) + else: + print(f"{on}.{key} = {mod}.{member}(None, {on_parent}, {on_parent}._root)", file=res) + """ + print(f"{on}.{key} = {mod}.{member}(None, {on_parent}, {on_parent}._root)", file=res) +''' + +val = None + +on_root = "root" +on = on_root +on_parent = on_root +root_class_name = root.__class__.__name__ +module_map = { + "Sqlite3": "kaitaistruct_sqlite3", +} +mod = root.__class__.__module__ # "kaitaistruct_sqlite3" +mod = root_class_name # "Sqlite3" +mod = module_map.get(mod, mod) + +def codegen( + obj, + out, + on="root", + on_parent=None, + root=None, + indent_step=4*" ", + indent_level=0, + enum_map_map={}, + module_map={}, + global_names=[], +): + global val # fix print_value + mod = obj.__class__.__module__ + member = obj.__class__.__name__ + is_root = True if on_parent == None else False + if is_root: + root = obj + global_names.append("root") + if 0: + # test + key = "header" + global_names.append(key) + global_names.append(f"local_{key}") + global_names.append(f"local_{key}_2") + global_names.append(mod) + # TODO add imports of dependencies. example: vlq_base128_be for sqlite3 + root_cln = root.__class__.__name__ + ind = indent_level * indent_step + ids = indent_step + if is_root: + print(f"{ind}import io", file=out) + print(f"{ind}import kaitaistruct", file=out) + print(f"{ind}import {mod}", file=out) + # TODO add imports of dependencies. example: vlq_base128_be for sqlite3 + # print(f"{ind}# root init", file=out) + print("", file=out) + print(f"{ind}root_size = {root._io._size}", file=out) + print("", file=out) + print(f"{ind}def get_root(_io=None, check=True):", file=out) + print(f"{ind}{ids}if not _io:", file=out) + print(f"{ind}{ids}{ids}_io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size)))", file=out) + print(f"{ind}{ids}{on} = {mod}.{member}(_io)", file=out) + # else: + # print(f"{ind}{ids}# non-root init", file=out) + # print(f"{ind}{ids}{on} = {mod}.{member}(_io, {on_parent}, {on_parent}._root)", file=out) + for key in get_seq(obj): + # print(f"{ind}{ids}# key {key}", file=out) + val = getattr(obj, key) + """ + print("key", repr(key)) + print("val", repr(val), dir(val)) + print_value("val.__class__.__module__") + print_value("val.__class__.__name__") + """ + # obj.__class__.__module__ == 'builtins' + mod = val.__class__.__module__ + member = val.__class__.__name__ + + # builtin types: int, bytes, ... + if mod == "builtins": + if isinstance(val, int) and val > 10: + print(f"{ind}{ids}{on}.{key} = {val!r} # {hex(val)}", file=out) + continue + if isinstance(val, bytes) and val == len(val) * b"\x00": + # compress null bytes + # TODO partial compression of bytestrings + print(f"{ind}{ids}{on}.{key} = {len(val)} * b'\\x00'", file=out) + continue + # bytes, ... + print(f"{ind}{ids}{on}.{key} = {val!r}", file=out) + continue + + # enum types + # class FormatVersion(IntEnum): + # if mod != "builtins": + # classtree = inspect.getclasstree(val) + # print("classtree", val, val.__class__, classtree) + # print("sourcelines", val, val.__class__) + lines, firstlineno = inspect.getsourcelines(val.__class__) + # for line in lines: + # print("line", line.rstrip()) + # lines.pop(0) # "def _read(self):" + # print("line0", lines[0].rstrip()) + # class FreelistTrunkPagePointer(ReadWriteKaitaiStruct): + # class FormatVersion(IntEnum): + # m = re.match(r"\s*class (\w+)\((IntEnum)\):", lines[0].rstrip()) + m = re.match(r"\s*class (\w+)\(([A-Z][A-Za-z0-9]*Enum)\):", lines[0].rstrip()) + if m: + enum_name, enum_type = m.groups() + enum_map = enum_map_map.get(enum_name) # read cache + if not enum_map: + enum_map = parse_enum_map(lines) + enum_map_map[enum_name] = enum_map # write cache + enum_key = enum_map.get(val) + # print("# enum type", enum_name, enum_type, file=out) + val_str = str(val) + if val > 10: + val_str += f" = {hex(val)}" + print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{enum_name}.{enum_key} # {val_str}", file=out) + continue + + # TODO handle list types + # m = ... + # if m: + # ... + # continue + + # user-defined types + # https://doc.kaitai.io/serialization.html#_user_defined_types + # print(f"{ind}{ids}{on}.{key} = root.{member}(root._io, {on}, {on}._root)", file=out) # short + print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{member}(root._io, {on}, {on}._root)", file=out) # long + # avoid shadowing global variables + local_key = get_local_key(key, global_names) + print(f"{ind}{ids}{local_key} = {on}.{key}", file=out) + # print(f"{ind}{ids}if 1:", file=out) # no block scope + # print(f"{ind}{ids}if {local_key} := {on}.{key}:", file=out) # no block scope + # TypeError: 'int' object does not support the context manager protocol + # print(f"{ind}{ids}with {on}.{key} as {local_key}:", file=out) # context # no block scope? + # create block scope + # this is required to avoid name collisions between scopes + # https://stackoverflow.com/a/45210833/10440128 + print(f"{ind}{ids}def init_{key}({local_key}):", file=out) # "init_" prefix + # print(f"{ind}{ids}def {key}_init({local_key}):", file=out) # "_init" suffix + # recursion + codegen( + val, + out, + local_key, + on, + root, + indent_step, + (indent_level + 1), + enum_map_map, + module_map, + global_names, + ) + print(f"{ind}{ids}init_{key}({local_key})", file=out) # "init_" prefix + # print(f"{ind}{ids}{key}_init({local_key})", file=out) # "_init" suffix + + # some user-defined types need this + # example: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + # but this breaks other cases... + # kaitaistruct.ValidationNotEqualError: /types/database_header/seq/0: at pos 20: validation failed: not equal, + # expected b'SQLite format 3\x00', but got b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + # print(f"{ind}{ids}{on}._read()", file=out) + + if is_root: + print(f"{ind}{ids}if check:", file=out) + print(f"{ind}{ids}{ids}{on}._check()", file=out) + print(f"{ind}{ids}return {on}", file=out) + if is_root: + print("", file=out) + print(f"{ind}def get_io():", file=out) + print(f"{ind}{ids}root = get_root()", file=out) + print(f"{ind}{ids}_io = root._io", file=out) + # print(f"{ind}{ids}_io.seek(0)", file=out) + print(f"{ind}{ids}# no. _write calls _fetch_instances which throws", file=out) + print(f"{ind}{ids}# root._write(_io)", file=out) + print(f"{ind}{ids}root._write__seq(_io)", file=out) + print(f"{ind}{ids}# root._fetch_instances() # this would throw", file=out) + print(f"{ind}{ids}root._io.write_back_child_streams()", file=out) + print(f"{ind}{ids}return _io", file=out) + print("", file=out) + print(f"{ind}def get_bytes():", file=out) + print(f"{ind}{ids}_io = get_io()", file=out) + print(f"{ind}{ids}_io.seek(0)", file=out) + print(f"{ind}{ids}return _io.read_bytes_full()", file=out) + +out = io.StringIO() +# rename imports +module_map = { + "Sqlite3": "kaitaistruct_sqlite3", +} +codegen(root, out, module_map=module_map) + +print("codegen result:") +print(out.getvalue()) +with open("codegen_result.py", "w") as f: + f.write(out.getvalue()) + +import codegen_result +codegen_bytes = codegen_result.get_bytes() +if codegen_bytes == len(codegen_bytes) * b"\x00": + raise Exception("codegen_bytes are only null bytes") +with open(codegen_database_path, "wb") as f: + f.write(codegen_bytes) + +# TODO rewrite diff in python +args = [ + "diff", "--color=always", "-u", + "<(", "xxd", codegen_database_path, ")", # red + "<(", "xxd", good_database_path, ")", # green + "|", "head", "-n100", +] +args = ["bash", "-c", " ".join(args)] +print(">", shlex.join(args)) +subprocess.run(args) diff --git a/serialize_py/kaitai_serialize_manual.py b/serialize_py/kaitai_serialize_manual.py new file mode 100755 index 0000000..6253adc --- /dev/null +++ b/serialize_py/kaitai_serialize_manual.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 + +good_database_path = "test_kaitai.py.good.db" +bad_database_path = "test_kaitai.py.bad.db" + +# FIXME vlq_base128_be.VlqBase128Be has no value setter +# expected API: vlq_base128_be.VlqBase128Be.from_value(123) +""" +>>> import vlq_base128_be +>>> i = vlq_base128_be.VlqBase128Be() +>>> i.value = 123 +AttributeError: property 'value' of 'VlqBase128Be' object has no setter +""" + +import io + +# https://github.com/kaitai-io/kaitai_struct_python_runtime +import kaitaistruct + +import kaitaistruct_sqlite3 +import vlq_base128_be +import pyvlq # https://github.com/osoken/pyvlq/blob/main/src/pyvlq/core.py + +def set_value(on_kn, v): + keys = on_kn.split(".") + on = keys.pop(0) + o = eval(on) + for key in keys[:-1]: + o = getattr(o, key) + key = keys.pop() + print(f"setting {on_kn} = {v!r}") + setattr(o, key, v) + return v + +def print_keys(on): + o = eval(on) + ignore = ("close", "from_bytes", "from_file", "from_io") + f = lambda a: "a" <= a[0] <= "z" and a not in ignore + print(f"{on} keys:", " ".join(filter(f, dir(o)))) + +def print_value(on_kn): + keys = on_kn.split(".") + on = keys.pop(0) + val = eval(on) + print(f"{on_kn} = ", end="") + try: + for key in keys: + val = getattr(val, key) + print(repr(val)) + except Exception as exc: print("error:", exc) + +def print_value(on_kn): + print(f"{on_kn} = ", end="") + try: + val = eval(on_kn) + print(repr(val)) + except Exception as exc: print("error:", exc) + +def check(on): + o = eval(on) + try: print(f"checking {on}: ", end=""); o._check(); print("ok") + except Exception as exc: print("error:", exc) + +def write(on): + o = eval(on) + _io.seek(0) # fix: _write__seq does not seek before writing + try: print(f"writing {on}: ", end=""); o._write(_io); print("ok") + except Exception as exc: print("error:", exc) + +page_size = 4096 + +num_pages = 2 + +# _io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(num_pages * page_size))) +# FIXME add extra space for database header +_io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(num_pages * page_size + 100))) + +if 0: + root = kaitaistruct_sqlite3.Sqlite3(_io) +else: + class PatchedSqlite3(kaitaistruct_sqlite3.Sqlite3): + def _write(self, io=None): + self._write__seq(io) + # self._fetch_instances() # this would throw + self._io.write_back_child_streams() + root = PatchedSqlite3(_io) + +# print("kaitaistruct_sqlite3", dir(kaitaistruct_sqlite3)) +# print("root", dir(root)) + +root.header = root.DatabaseHeader(root._io, root, root._root) + +print_keys("root") +print_keys("root.header") + +print_value("root.header.len_page") +# AttributeError: 'Sqlite3' object has no attribute 'len_page_mod' + +# u2 @ 0x10 = 16 +set_value("root.header.page_size_raw", page_size if page_size < 65536 else 1) +print_value("root.header.len_page") # derived from root.header.page_size_raw +print_keys("root.header") + +write("root") +# AttributeError: 'Sqlite3' object has no attribute 'magic' + +# 00000000: 5351 4c69 7465 2066 6f72 6d61 7420 3300 SQLite format 3. +set_value("root.header.magic", b"SQLite format 3\0") # 16 bytes @ 0x0 +# set_value("root.header.magic", b"some_magic_strr\0") # test +print_keys("root.header") + +write("root") +# AttributeError: 'Sqlite3' object has no attribute 'read_version'. Did you mean: 'write_version'? + +set_value("root.header.read_version", 0) +print_keys("root.header") + +write("root") +# AttributeError: 'Sqlite3' object has no attribute 'reserved_space' + +# 00000000: 5351 4c69 7465 2066 6f72 6d61 7420 3300 SQLite format 3. +set_value("root.header.magic", b"SQLite format 3\0") # 16 bytes @ 0x0 + +# 00000010: 1000 0101 0040 2020 0000 0001 0000 0002 .....@ ........ +set_value("root.header.len_page_mod", page_size if page_size < 65536 else 1) # u2 @ 0x10: 4096 == 0x1000 +set_value("root.header.write_version", 1) # u1 @ 0x12 +set_value("root.header.read_version", 1) # u1 @ 0x13 +set_value("root.header.page_reserved_space_size", 0) # u1 @ 0x14 +set_value("root.header.max_payload_fraction", 64) # u1 @ 0x15: 64 == 0x40 +set_value("root.header.min_payload_fraction", 32) # u1 @ 0x16: 32 == 0x20 +set_value("root.header.leaf_payload_fraction", 32) # u1 @ 0x17: 32 == 0x20 +set_value("root.header.file_change_counter", 1) # u4 @ 0x18 +set_value("root.header.num_pages", num_pages) # u4 @ 0x1b + +# 00000020: 0000 0000 0000 0000 0000 0001 0000 0004 ................ +# set_value("root.header.first_freelist_trunk_page", 0) # u4 @ 0x20 +root.header.first_freelist_trunk_page = root.FreelistTrunkPagePointer() # u4 @ 0x20# +root.header.first_freelist_trunk_page.page_number = 0 +set_value("root.header.num_freelist_pages", 0) # u4 @ 0x24 +set_value("root.header.schema_cookie", 1) # u4 @ 0x28 +set_value("root.header.schema_format", 4) # u4 @ 0x2b + +# 00000030: 0000 0000 0000 0000 0000 0001 0000 0000 ................ +set_value("root.header.default_page_cache_size", 0) # u4 @ 0x30 +set_value("root.header.largest_root_page", 0) # u4 @ 0x34 +set_value("root.header.text_encoding", 1) # utf8 # u4 @ 0x38 +set_value("root.header.user_version", 0) # u4 @ 0x3b + +# 00000040: 0000 0000 0000 0000 0000 0000 0000 0000 ................ +set_value("root.header.is_incremental_vacuum", 0) # u4 @ 0x40 +set_value("root.header.application_id", 0) # u4 @ 0x44 + +# 00000050: 0000 0000 0000 0000 0000 0000 0000 0001 ................ +set_value("root.header.reserved_header_bytes", b"\x00" * 20) # 20 bytes @ 0x48 +set_value("root.header.version_valid_for", 1) # u4 @ 0x5b + +# 0011 2233 4455 6677 8899 aabb ccdd eeff +# 00000060: 002e 8a11 0d00 0000 010f cc00 0fcc 0000 ................ +# ^^^^ cell_ptr.ofs_content +# ^^^^ page.cell_pointers[0] @ 0x6c +# ^^ page.num_frag_free_bytes +# ^^^^ page.ofs_cell_content_area_raw +# ^^ ^^ page.num_cells +set_value("root.header.sqlite_version_number", 0x002e8a11) # u4 @ 0x60 +# page = set_value("root.header.root_page", root.BtreePage()) # old +page_number = 1 +root.pages = [] +# page = set_value("root.header.root_page", root.BtreePage(page_number)) +page = root.BtreePage(page_number) +root.pages.append(page) +page.database_header = root.header +page._root = root +check("root") +# FIXME Check failed: root_page, expected: , actual: None +page.page_type = 0x0d # cell_table_leaf # u1 @ 0x64 +page.first_freeblock = 0 # u2 @ 0x65 +page.num_cells = 1 # u2 @ 0x67 +# TODO seek to page.ofs_cells and write cells +# page.ofs_cells = 0x0fcc # u2 +page.ofs_cell_content_area_raw = 0x0fcc # u2 @ 0x69 +page.num_frag_free_bytes = 0 # u1 @ 0x6b +# page.right_ptr = 0 # only for (page_type == 2 or page_type == 5) # u4 +# print("dir root:", dir(root)) +page.cell_pointers = [] # 0x6c + +cell_ptr = root.CellPointer() +cell_ptr.ofs_content = 0x0fcc # u2 @ 0x6c +cell_ptr._parent = page +cell_ptr._root = root +page.cell_pointers.append(cell_ptr) + +if 0: + + # cell = root.CellTableLeaf() # old + cell = root.TableLeafCell() + cell._root = root + + # no! + # TODO write cell to cell_ptr.ofs_content + ############### page.cell_pointers.append(cell) + + root._io.seek(cell_ptr.ofs_content) + root._io.write_bytes(b"\xbe\xef") # beef + + """ + table_leaf_cell: + doc-ref: 'https://www.sqlite.org/fileformat2.html#b_tree_pages' + seq: + - id: payload_size + type: vlq_base128_be + doc: | + total number of bytes of payload, + including any overflow + - id: row_id + type: vlq_base128_be + doc: | + integer key, a.k.a. "rowid" + - id: payload + type: + switch-on: '(payload_size.value > _root.header.table_max_overflow_payload_size ? 1 : 0)' + cases: + 0: record + 1: overflow_record(payload_size.value, _root.header.table_max_overflow_payload_size) + """ + + # NOTE cell.len_payload is derived from len(content_str) + # cell.payload_size = vlq_base128_be.VlqBase128Be.from_bytes(b"\x0f") # 0x0f @ 0x69 + cell.payload_size = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(15)) # 0x0f @ 0x69 + cell.payload_size._read() # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + print_value("cell.payload_size.value") + + # cell.row_id = vlq_base128_be.VlqBase128Be.from_bytes(b"\xcc\x00") # 0xcc00 @ 0x6a + cell.row_id = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(9728)) # 0xcc00 @ 0x6a + cell.row_id._read() # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + print_value("cell.row_id.value") + + # FIXME move payload to page.ofs_cells = 0x0fc0 + + record_offset = page.ofs_cell_content_area_raw # 0x0fcc + root._io.write_bytes(b"\xbe\xef") # FIXME no effect? + root._io.seek(record_offset) # FIXME no effect? + root._io.write_bytes(b"\xbe\xef") # FIXME no effect? + print_value("root._io.tell()") + print_value("dir(root._io)") + + # payload = cell.payload = root.CellPayload() # old + record = cell.payload = root.Record() + # record = cell.payload = root.OverflowRecord(payload_size=12, overflow_payload_size_max=34) # ? + + # TODO what? + record_header_size = 0 # ValueError: negative count + record_header_size = 1 # kaitaistruct.ConsistencyError: Check failed: entries, expected: 0, actual: 0 + record_header_size = 2 + record_header_size = 3 # kaitaistruct.ConsistencyError: Check failed: entries, expected: 0, actual: 1 + record_header_size = 4 # kaitaistruct.ConsistencyError: Check failed: entries, expected: 0, actual: 2 + record_header_size = 2 + + """ + record: + doc-ref: 'https://sqlite.org/fileformat2.html#record_format' + seq: + - id: header_size + type: vlq_base128_be + - id: header + type: record_header + size: header_size.value - 1 + - id: values + type: value(header.value_types[_index]) + repeat: expr + repeat-expr: header.value_types.size + record_header: + seq: + - id: value_types + type: serial_type + repeat: eos + """ + + record.header_size = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(record_header_size)) + record.header_size._read() # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + print_value("record.header_size.value") # 2 + + record.header = root.RecordHeader() + record.header.value_types = [] + + record.values = [] + + content_str = "asdf" # len: 4 + content_str = "CREATE TABLE test (id INTEGER)" # len: 30 + + serial_type = root.SerialType() + serial_type_raw_value = 12 + len(content_str) * 2 + 1 # odd = str, even = bytes + serial_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(serial_type_raw_value)) + serial_type.raw_value._read() # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + print_value("serial_type.raw_value.value") + record.header.value_types.append(serial_type) + + """ + record.record_header_size = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(record_header_size)) + record.record_header_size._read() # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + print_value("record.record_header_size.value") + """ + record.column_contents = [] + + if 0: + serials = record.column_serials = root.Serials() + serials.entries = [] + + entry = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(0)) + entry._read() # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + serials.entries.append(entry) + + """ + serials: + seq: + - id: entries + type: vlq_base128_be + repeat: eos + """ + + serial = root.Serial() + """ + is_string: + value: 'code.value >= 13 and (code.value % 2 == 1)' + """ + + serial_code_value = 12 + len(content_str) * 2 + 1 # odd = str, even = bytes + serial.code = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(serial_code_value)) + serial.code._read() # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + print_value("serial.code.value") + + content = root.ColumnContent(ser=serial) + # size: serial_type.len_content + content.as_str = content_str + record.column_contents.append(content) + + content = root.StringUtf8(len_value=len(content_str)) + content.value = content_str + + record.values.append(content) + +print("root", dir(root)) # debug + +print_keys("root") + +# FIXME +""" +checking s: ok +writing s: error: requested invalid -1 amount of bytes +""" + +check("root") +# Check failed: root_page, expected: , actual: None + +write("root") +# 'Sqlite3' object has no attribute 'reserved_space' + +# TODO seek to page.ofs_cells = 0x0fc0 and write cells +# 00000fb0: 0000 0000 0000 0000 0000 0000 0000 0000 ................ +# -00000fc0: 0000 0000 0000 0000 0000 0000 0000 0000 ................ +# +00000fc0: 0000 0000 0000 0000 0000 0000 3201 0617 ............2... +# ^ 0x0fc0 + 12 = 0x0fcc = 4044 + + + +_io.seek(0) # fix: _write__seq does not seek before writing + +# no. _write calls _fetch_instances which throws +# print(f"writing root"); root._write(_io) +print(f"writing root"); root._write__seq(_io); root._io.write_back_child_streams() + +print("writing done") + +_io.seek(0) +_bytes = _io.read_bytes(num_pages * page_size) +# print(_bytes) + +print("writing", bad_database_path) +with open(bad_database_path, "wb") as f: + f.write(_bytes) + +import sqlite3 +con = sqlite3.connect(bad_database_path) +try: con.execute("select * from sqlite_schema") +except Exception as exc: print(exc) +# sqlite3.DatabaseError: file is not a database + +import subprocess +import shlex +import os + +if not os.path.exists(good_database_path): + args = [ + "sqlite3", + good_database_path, + "create table test (id INTEGER)", + ] + print(">", shlex.join(args)) + subprocess.run(args) + +# TODO rewrite diff in python +args = [ + "diff", "--color=always", "-u", + "<(", "xxd", bad_database_path, ")", # red + "<(", "xxd", good_database_path, ")", # green + # "|", "head", "-n20", + "|", "head", "-n100", +] +args = ["bash", "-c", " ".join(args)] +print(">", shlex.join(args)) +subprocess.run(args) diff --git a/serialize_py/kaitaistruct.py b/serialize_py/kaitaistruct.py new file mode 100644 index 0000000..e774dba --- /dev/null +++ b/serialize_py/kaitaistruct.py @@ -0,0 +1,1003 @@ +import itertools +import sys +import struct +from io import open, BytesIO, SEEK_CUR, SEEK_END # noqa +import warnings + +PY2 = sys.version_info[0] == 2 + +# Kaitai Struct runtime version, in the format defined by PEP 440. +# Used by our setup.cfg to set the version number in +# packaging/distribution metadata. +# Also used in Python code generated by older ksc versions (0.7 through 0.9) +# to check that the imported runtime is compatible with the generated code. +# Since ksc 0.10, the compatibility check instead uses the API_VERSION constant, +# so that the version string does not need to be parsed at runtime +# (see https://github.com/kaitai-io/kaitai_struct/issues/804). +__version__ = '0.11.dev1' + +# Kaitai Struct runtime API version, as a tuple of ints. +# Used in generated Python code (since ksc 0.10) to check that the imported +# runtime is compatible with the generated code. +API_VERSION = (0, 11) + +# pylint: disable=invalid-name,missing-docstring,too-many-public-methods +# pylint: disable=useless-object-inheritance,super-with-arguments,consider-using-f-string + + +class KaitaiStruct(object): + def __init__(self, stream): + self._io = stream + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + def close(self): + self._io.close() + + @classmethod + def from_file(cls, filename): + f = open(filename, 'rb') + try: + return cls(KaitaiStream(f)) + except Exception: + # close file descriptor, then reraise the exception + f.close() + raise + + @classmethod + def from_bytes(cls, buf): + return cls(KaitaiStream(BytesIO(buf))) + + @classmethod + def from_io(cls, io): + return cls(KaitaiStream(io)) + + +class ReadWriteKaitaiStruct(KaitaiStruct): + def _fetch_instances(self): + raise NotImplementedError() + + def _write(self, io=None): + self._write__seq(io) + self._fetch_instances() + self._io.write_back_child_streams() + + def _write__seq(self, io): + if io is not None: + self._io = io + + +class KaitaiStream(object): + def __init__(self, io): + self._io = io + self.align_to_byte() + self.bits_le = False + self.bits_write_mode = False + + self.write_back_handler = None + self.child_streams = [] + + try: + self._size = self.size() + # IOError is for Python 2 (IOError also exists in Python 3, but it has + # become just an alias for OSError). + # + # Although I haven't actually seen a bare ValueError raised in this case + # in practice, chances are some implementation may be doing it (see + # for reference: + # "Also, implementations may raise a ValueError (or + # UnsupportedOperation) when operations they do not support are + # called."). And I've seen ValueError raised at least in Python 2 when + # calling read() on an unreadable stream. + except (OSError, IOError, ValueError): + # tell() or seek() failed - we have a non-seekable stream (which is + # fine for reading, but writing will fail, see + # _write_bytes_not_aligned()) + pass + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + def close(self): + try: + if self.bits_write_mode: + self.write_align_to_byte() + else: + self.align_to_byte() + finally: + self._io.close() + + # region Stream positioning + + def is_eof(self): + if not self.bits_write_mode and self.bits_left > 0: + return False + + # NB: previously, we first tried if self._io.read(1) did in fact read 1 + # byte from the stream (and then seeked 1 byte back if so), but given + # that is_eof() may be called from both read and write contexts, it's + # more universal not to use read() at all. See also + # . + return self._io.tell() >= self.size() + + def seek(self, n): + if n < 0: + raise InvalidArgumentError("cannot seek to invalid position %d" % (n,)) + + if self.bits_write_mode: + self.write_align_to_byte() + else: + self.align_to_byte() + + self._io.seek(n) + + def pos(self): + return self._io.tell() + (1 if self.bits_write_mode and self.bits_left > 0 else 0) + + def size(self): + # Python has no internal File object API function to get + # current file / StringIO size, thus we use the following + # trick. + io = self._io + # Remember our current position + cur_pos = io.tell() + # Seek to the end of the stream and remember the full length + full_size = io.seek(0, SEEK_END) + + if full_size is None: + # In Python 2, the seek() method of 'file' objects (created by the + # built-in open() function) has no return value, so we have to call + # tell() ourselves to get the new absolute position - see + # . + # + # In Python 3, seek() methods of all + # streams return the new + # position already, so this won't be needed once we drop support for + # Python 2. + full_size = io.tell() + + # Seek back to the current position + io.seek(cur_pos) + return full_size + + # endregion + + # region Structs for numeric types + + packer_s1 = struct.Struct('b') + packer_s2be = struct.Struct('>h') + packer_s4be = struct.Struct('>i') + packer_s8be = struct.Struct('>q') + packer_s2le = struct.Struct('H') + packer_u4be = struct.Struct('>I') + packer_u8be = struct.Struct('>Q') + packer_u2le = struct.Struct('f') + packer_f8be = struct.Struct('>d') + packer_f4le = struct.Struct(' 0: + # 1 bit => 1 byte + # 8 bits => 1 byte + # 9 bits => 2 bytes + bytes_needed = ((bits_needed - 1) // 8) + 1 # `ceil(bits_needed / 8)` + buf = self._read_bytes_not_aligned(bytes_needed) + if PY2: + buf = bytearray(buf) + for byte in buf: + res = res << 8 | byte + + new_bits = res + res = res >> self.bits_left | self.bits << bits_needed + self.bits = new_bits # will be masked at the end of the function + else: + res = self.bits >> -bits_needed # shift unneeded bits out + + mask = (1 << self.bits_left) - 1 # `bits_left` is in range 0..7 + self.bits &= mask + + return res + + def read_bits_int(self, n): + """Deprecated and no longer used as of KSC 0.9. It is only available + for backwards compatibility and will be removed in the future. + + KSC 0.9 and later uses `read_bits_int_be()` instead. + """ + warnings.warn( + "read_bits_int() is deprecated since 0.9, use read_bits_int_be() instead", + DeprecationWarning, + stacklevel=2, + ) + return self.read_bits_int_be(n) + + def read_bits_int_le(self, n): + self.bits_write_mode = False + + res = 0 + bits_needed = n - self.bits_left + + if bits_needed > 0: + # 1 bit => 1 byte + # 8 bits => 1 byte + # 9 bits => 2 bytes + bytes_needed = ((bits_needed - 1) // 8) + 1 # `ceil(bits_needed / 8)` + buf = self._read_bytes_not_aligned(bytes_needed) + if PY2: + buf = bytearray(buf) + for i, byte in enumerate(buf): + res |= byte << (i * 8) + + new_bits = res >> bits_needed + res = res << self.bits_left | self.bits + self.bits = new_bits + else: + res = self.bits + self.bits >>= n + + self.bits_left = -bits_needed % 8 + + mask = (1 << n) - 1 # no problem with this in Python (arbitrary precision integers) + res &= mask + return res + + # endregion + + # region Byte arrays + + def read_bytes(self, n): + self.align_to_byte() + return self._read_bytes_not_aligned(n) + + def _read_bytes_not_aligned(self, n): + if n < 0: + raise InvalidArgumentError( + "requested invalid %d amount of bytes" % + (n,) + ) + + is_satisfiable = True + # When a large number of bytes is requested, try to check first + # that there is indeed enough data left in the stream. + # This avoids reading large amounts of data only to notice afterwards + # that it's not long enough. For smaller amounts of data, it's faster to + # first read the data unconditionally and check the length afterwards. + if ( + n >= 8*1024*1024 # = 8 MiB + # in Python 2, there is a common error ['file' object has no + # attribute 'seekable'], so we need to make sure that seekable() exists + and callable(getattr(self._io, 'seekable', None)) + and self._io.seekable() + ): + num_bytes_available = self.size() - self.pos() + is_satisfiable = (n <= num_bytes_available) + + if is_satisfiable: + r = self._io.read(n) + num_bytes_available = len(r) + is_satisfiable = (n <= num_bytes_available) + + if not is_satisfiable: + # noinspection PyUnboundLocalVariable + raise EndOfStreamError( + "requested %d bytes, but only %d bytes available" % + (n, num_bytes_available), + n, num_bytes_available + ) + + # noinspection PyUnboundLocalVariable + return r + + def read_bytes_full(self): + self.align_to_byte() + return self._io.read() + + def read_bytes_term(self, term, include_term, consume_term, eos_error): + self.align_to_byte() + term_byte = KaitaiStream.byte_from_int(term) + r = bytearray() + while True: + c = self._io.read(1) + if not c: + if eos_error: + raise NoTerminatorFoundError(term_byte, 0) + + return bytes(r) + + if c == term_byte: + if include_term: + r += c + if not consume_term: + self._io.seek(-1, SEEK_CUR) + return bytes(r) + + r += c + + def read_bytes_term_multi(self, term, include_term, consume_term, eos_error): + self.align_to_byte() + unit_size = len(term) + r = bytearray() + while True: + c = self._io.read(unit_size) + if len(c) < unit_size: + if eos_error: + raise NoTerminatorFoundError(term, len(c)) + + r += c + return bytes(r) + + if c == term: + if include_term: + r += c + if not consume_term: + self._io.seek(-unit_size, SEEK_CUR) + return bytes(r) + + r += c + + def ensure_fixed_contents(self, expected): + """Deprecated and no longer used as of KSC 0.9. It is only available + for backwards compatibility and will be removed in the future. + + KSC 0.9 and later explicitly raises `ValidationNotEqualError` from an + `if` statement instead. + """ + warnings.warn( + "ensure_fixed_contents() is deprecated since 0.9, explicitly raise " + "ValidationNotEqualError from an `if` statement instead", + DeprecationWarning, + stacklevel=2, + ) + actual = self._io.read(len(expected)) + if actual != expected: + raise Exception( + "unexpected fixed contents: got %r, was waiting for %r" % + (actual, expected) + ) + return actual + + @staticmethod + def bytes_strip_right(data, pad_byte): + return data.rstrip(KaitaiStream.byte_from_int(pad_byte)) + + @staticmethod + def bytes_terminate(data, term, include_term): + term_index = KaitaiStream.byte_array_index_of(data, term) + if term_index == -1: + return data[:] + return data[:term_index + (1 if include_term else 0)] + + @staticmethod + def bytes_terminate_multi(data, term, include_term): + unit_size = len(term) + search_index = data.find(term) + while True: + if search_index == -1: + return data[:] + mod = search_index % unit_size + if mod == 0: + return data[:search_index + (unit_size if include_term else 0)] + search_index = data.find(term, search_index + (unit_size - mod)) + + # endregion + + # endregion + + # region Writing + + def _ensure_bytes_left_to_write(self, n, pos): + try: + full_size = self._size + except AttributeError: + raise ValueError("writing to non-seekable streams is not supported") + + num_bytes_left = full_size - pos + if n > num_bytes_left: + raise EndOfStreamError( + "requested to write %d bytes, but only %d bytes left in the stream" % + (n, num_bytes_left), + n, num_bytes_left + ) + + # region Integer numbers + + # region Signed + + def write_s1(self, v): + self.write_bytes(KaitaiStream.packer_s1.pack(v)) + + # region Big-endian + + def write_s2be(self, v): + self.write_bytes(KaitaiStream.packer_s2be.pack(v)) + + def write_s4be(self, v): + self.write_bytes(KaitaiStream.packer_s4be.pack(v)) + + def write_s8be(self, v): + self.write_bytes(KaitaiStream.packer_s8be.pack(v)) + + # endregion + + # region Little-endian + + def write_s2le(self, v): + self.write_bytes(KaitaiStream.packer_s2le.pack(v)) + + def write_s4le(self, v): + self.write_bytes(KaitaiStream.packer_s4le.pack(v)) + + def write_s8le(self, v): + self.write_bytes(KaitaiStream.packer_s8le.pack(v)) + + # endregion + + # endregion + + # region Unsigned + + def write_u1(self, v): + self.write_bytes(KaitaiStream.packer_u1.pack(v)) + + # region Big-endian + + def write_u2be(self, v): + self.write_bytes(KaitaiStream.packer_u2be.pack(v)) + + def write_u4be(self, v): + self.write_bytes(KaitaiStream.packer_u4be.pack(v)) + + def write_u8be(self, v): + self.write_bytes(KaitaiStream.packer_u8be.pack(v)) + + # endregion + + # region Little-endian + + def write_u2le(self, v): + self.write_bytes(KaitaiStream.packer_u2le.pack(v)) + + def write_u4le(self, v): + self.write_bytes(KaitaiStream.packer_u4le.pack(v)) + + def write_u8le(self, v): + self.write_bytes(KaitaiStream.packer_u8le.pack(v)) + + # endregion + + # endregion + + # endregion + + # region Floating point numbers + + # region Big-endian + + def write_f4be(self, v): + self.write_bytes(KaitaiStream.packer_f4be.pack(v)) + + def write_f8be(self, v): + self.write_bytes(KaitaiStream.packer_f8be.pack(v)) + + # endregion + + # region Little-endian + + def write_f4le(self, v): + self.write_bytes(KaitaiStream.packer_f4le.pack(v)) + + def write_f8le(self, v): + self.write_bytes(KaitaiStream.packer_f8le.pack(v)) + + # endregion + + # endregion + + # region Unaligned bit values + + def write_align_to_byte(self): + if self.bits_left > 0: + b = self.bits + if not self.bits_le: + b <<= 8 - self.bits_left + + # We clear the `bits_left` and `bits` fields using align_to_byte() + # before writing the byte in the stream so that it happens even in + # case the write fails. The reason is that if the write fails, it + # would likely be a permanent issue that's not going to resolve + # itself when retrying the operation with the same stream state, and + # since seek() calls write_align_to_byte() at the beginning too, you + # wouldn't be even able to seek anywhere without getting the same + # exception again. So the stream could be in a broken state, + # throwing the same exception over and over again even though you've + # already processed it and you'd like to move on. And the only way + # to get rid of it would be to call align_to_byte() externally + # (given how it's currently implemented), but that's really just a + # coincidence - that's a method intended for reading (not writing) + # and it should never be necessary to call it from the outside (it's + # more like an internal method now). + # + # So it seems more reasonable to deliver the exception once and let + # the user application process it, but otherwise clear the bit + # buffer to make the stream ready for further operations and to + # avoid repeatedly delivering an exception for one past failed + # operation. The rationale behind this is that it's not really a + # failure of the "align to byte" operation, but the writing of some + # bits to the stream that was requested earlier. + self.align_to_byte() + self._write_bytes_not_aligned(KaitaiStream.byte_from_int(b)) + + def write_bits_int_be(self, n, val): + self.bits_le = False + self.bits_write_mode = True + + mask = (1 << n) - 1 # no problem with this in Python (arbitrary precision integers) + val &= mask + + bits_to_write = self.bits_left + n + bytes_needed = ((bits_to_write - 1) // 8) + 1 # `ceil(bits_to_write / 8)` + + # Unlike self._io.tell(), pos() respects the `bits_left` field (it + # returns the stream position as if it were already aligned on a byte + # boundary), which ensures that we report the same numbers of bytes here + # as read_bits_int_*() methods would. + self._ensure_bytes_left_to_write(bytes_needed - (1 if self.bits_left > 0 else 0), self.pos()) + + bytes_to_write = bits_to_write // 8 + self.bits_left = bits_to_write % 8 + + if bytes_to_write > 0: + buf = bytearray(bytes_to_write) + + mask = (1 << self.bits_left) - 1 # `bits_left` is in range 0..7 + new_bits = val & mask + val = val >> self.bits_left | self.bits << (n - self.bits_left) + self.bits = new_bits + + for i in range(bytes_to_write - 1, -1, -1): + buf[i] = val & 0xff + val >>= 8 + self._write_bytes_not_aligned(buf) + else: + self.bits = self.bits << n | val + + def write_bits_int_le(self, n, val): + self.bits_le = True + self.bits_write_mode = True + + bits_to_write = self.bits_left + n + bytes_needed = ((bits_to_write - 1) // 8) + 1 # `ceil(bits_to_write / 8)` + + # Unlike self._io.tell(), pos() respects the `bits_left` field (it + # returns the stream position as if it were already aligned on a byte + # boundary), which ensures that we report the same numbers of bytes here + # as read_bits_int_*() methods would. + self._ensure_bytes_left_to_write(bytes_needed - (1 if self.bits_left > 0 else 0), self.pos()) + + bytes_to_write = bits_to_write // 8 + old_bits_left = self.bits_left + self.bits_left = bits_to_write % 8 + + if bytes_to_write > 0: + buf = bytearray(bytes_to_write) + + new_bits = val >> (n - self.bits_left) # no problem with this in Python (arbitrary precision integers) + val = val << old_bits_left | self.bits + self.bits = new_bits + + for i in range(bytes_to_write): + buf[i] = val & 0xff + val >>= 8 + self._write_bytes_not_aligned(buf) + else: + self.bits |= val << old_bits_left + + mask = (1 << self.bits_left) - 1 # `bits_left` is in range 0..7 + self.bits &= mask + + # endregion + + # region Byte arrays + + def write_bytes(self, buf): + self.write_align_to_byte() + self._write_bytes_not_aligned(buf) + + def _write_bytes_not_aligned(self, buf): + n = len(buf) + self._ensure_bytes_left_to_write(n, self._io.tell()) + self._io.write(buf) + + def write_bytes_limit(self, buf, size, term, pad_byte): + n = len(buf) + # Strictly speaking, this assertion is redundant because it is already + # done in the corresponding _check() method in the generated code, but + # it seems to make sense to include it here anyway so that this method + # itself does something reasonable for every set of arguments. + # + # However, it should never be `false` when operated correctly (and in + # this case, assigning inconsistent values to fields of a KS-generated + # object is considered correct operation if the user application calls + # the corresponding _check(), which we know would raise an error and + # thus the code should not reach _write() and this method at all). So + # it's by design that this throws AssertionError, not any specific + # error, because it's not intended to be caught in user applications, + # but avoided by calling all _check() methods correctly. + assert n <= size, "writing %d bytes, but %d bytes were given" % (size, n) + + self.write_bytes(buf) + if n < size: + self.write_u1(term) + self.write_bytes(KaitaiStream.byte_from_int(pad_byte) * (size - n - 1)) + + # endregion + + # endregion + + # region Byte array processing + + @staticmethod + def process_xor_one(data, key): + if PY2: + return bytes(bytearray(v ^ key for v in bytearray(data))) + + return bytes(v ^ key for v in data) + + @staticmethod + def process_xor_many(data, key): + if PY2: + return bytes(bytearray(a ^ b for a, b in zip(bytearray(data), itertools.cycle(bytearray(key))))) + + return bytes(a ^ b for a, b in zip(data, itertools.cycle(key))) + + @staticmethod + def process_rotate_left(data, amount, group_size): + if group_size != 1: + raise NotImplementedError( + "unable to rotate group of %d bytes yet" % + (group_size,) + ) + + anti_amount = -amount % (group_size * 8) + + r = bytearray(data) + for i, byte in enumerate(r): + r[i] = (byte << amount) & 0xff | (byte >> anti_amount) + return bytes(r) + + # endregion + + # region Misc runtime operations + + @staticmethod + def int_from_byte(v): + return ord(v) if PY2 else v + + @staticmethod + def byte_from_int(i): + return chr(i) if PY2 else bytes((i,)) + + @staticmethod + def byte_array_index(data, i): + return KaitaiStream.int_from_byte(data[i]) + + @staticmethod + def byte_array_min(b): + return KaitaiStream.int_from_byte(min(b)) + + @staticmethod + def byte_array_max(b): + return KaitaiStream.int_from_byte(max(b)) + + @staticmethod + def byte_array_index_of(data, b): + return data.find(KaitaiStream.byte_from_int(b)) + + @staticmethod + def resolve_enum(enum_obj, value): + """Resolves value using enum: if the value is not found in the map, + we'll just use literal value per se. Works around problem with Python + enums throwing an exception when encountering unknown value. + """ + try: + return enum_obj(value) + except ValueError: + return value + + # endregion + + def to_byte_array(self): + pos = self.pos() + self.seek(0) + r = self.read_bytes_full() + self.seek(pos) + return r + + class WriteBackHandler(object): + def __init__(self, pos, handler): + self.pos = pos + self.handler = handler + + def write_back(self, parent): + parent.seek(self.pos) + self.handler(parent) + + def add_child_stream(self, child): + self.child_streams.append(child) + + def write_back_child_streams(self, parent=None): + _pos = self.pos() + for child in self.child_streams: + child.write_back_child_streams(self) + + # NOTE: Python 2 doesn't have list.clear() so it can't be used, see + # https://docs.python.org/3.11/library/stdtypes.html#mutable-sequence-types + # ("New in version 3.3: clear() and copy() methods.") + del self.child_streams[:] + self.seek(_pos) + if parent is not None: + self._write_back(parent) + + def _write_back(self, parent): + self.write_back_handler.write_back(parent) + + +class KaitaiStructError(Exception): + """Common ancestor for all errors originating from correct Kaitai Struct + usage (i.e. errors that indicate a problem with user input, not errors + indicating incorrect usage that are not meant to be caught but fixed in the + application code). Use this exception type in the `except` clause if you + want to handle all parse errors and serialization errors. + + If available, the `src_path` attribute will contain the KSY source path + pointing to the element where the error occurred. If it is not available, + `src_path` will be `None`. + """ + def __init__(self, msg, src_path): + super(KaitaiStructError, self).__init__(("" if src_path is None else src_path + ": ") + msg) + self.src_path = src_path + + +class InvalidArgumentError(KaitaiStructError, ValueError): + """Indicates that an invalid argument value was received (like `ValueError`), + but used in places where this might indicate invalid user input and + therefore represents a parse error or serialization error. + """ + def __init__(self, msg): + super(InvalidArgumentError, self).__init__(msg, None) + + +class EndOfStreamError(KaitaiStructError, EOFError): + """Read or write beyond end of stream. Provides the `bytes_needed` (number + of bytes requested to read or write) and `bytes_available` (number of bytes + remaining in the stream) attributes. + """ + def __init__(self, msg, bytes_needed, bytes_available): + super(EndOfStreamError, self).__init__(msg, None) + self.bytes_needed = bytes_needed + self.bytes_available = bytes_available + + +class NoTerminatorFoundError(EndOfStreamError): + """Special type of `EndOfStreamError` that occurs when end of stream is + reached before the required terminator is found. If you want to tolerate a + missing terminator, you can specify `eos-error: false` in the KSY + specification, in which case the end of stream will be considered a valid + end of field and this error will no longer be raised. + + The `term` attribute contains a `bytes` object with the searched terminator. + """ + def __init__(self, term, bytes_available): + super(NoTerminatorFoundError, self).__init__("end of stream reached, but no terminator %r found" % (term,), len(term), bytes_available) + self.term = term + + +class UndecidedEndiannessError(KaitaiStructError): + """Error that occurs when default endianness should be decided with + switch, but nothing matches (although using endianness expression + implies that there should be some positive result). + """ + def __init__(self, src_path): + super(UndecidedEndiannessError, self).__init__("unable to decide on endianness for a type", src_path) + + +class ValidationFailedError(KaitaiStructError): + """Common ancestor for all validation failures. Stores pointer to + KaitaiStream IO object which was involved in an error. + """ + def __init__(self, msg, io, src_path): + super(ValidationFailedError, self).__init__(("" if io is None else "at pos %d: " % (io.pos(),)) + "validation failed: " + msg, src_path) + self.io = io + + +class ValidationNotEqualError(ValidationFailedError): + """Signals validation failure: we required "actual" value to be equal to + "expected", but it turned out that it's not. + """ + def __init__(self, expected, actual, io, src_path): + super(ValidationNotEqualError, self).__init__("not equal, expected %s, but got %s" % (repr(expected), repr(actual)), io, src_path) + self.expected = expected + self.actual = actual + + +class ValidationLessThanError(ValidationFailedError): + """Signals validation failure: we required "actual" value to be + greater than or equal to "min", but it turned out that it's not. + """ + def __init__(self, min_bound, actual, io, src_path): + super(ValidationLessThanError, self).__init__("not in range, min %s, but got %s" % (repr(min_bound), repr(actual)), io, src_path) + self.min = min_bound + self.actual = actual + + +class ValidationGreaterThanError(ValidationFailedError): + """Signals validation failure: we required "actual" value to be + less than or equal to "max", but it turned out that it's not. + """ + def __init__(self, max_bound, actual, io, src_path): + super(ValidationGreaterThanError, self).__init__("not in range, max %s, but got %s" % (repr(max_bound), repr(actual)), io, src_path) + self.max = max_bound + self.actual = actual + + +class ValidationNotAnyOfError(ValidationFailedError): + """Signals validation failure: we required "actual" value to be + from the list, but it turned out that it's not. + """ + def __init__(self, actual, io, src_path): + super(ValidationNotAnyOfError, self).__init__("not any of the list, got %s" % (repr(actual)), io, src_path) + self.actual = actual + + +class ValidationNotInEnumError(ValidationFailedError): + """Signals validation failure: we required "actual" value to be in + the enum, but it turned out that it's not. + """ + def __init__(self, actual, io, src_path): + super(ValidationNotInEnumError, self).__init__("not in the enum, got %s" % (repr(actual)), io, src_path) + self.actual = actual + + +class ValidationExprError(ValidationFailedError): + """Signals validation failure: we required "actual" value to match + the expression, but it turned out that it doesn't. + """ + def __init__(self, actual, io, src_path): + super(ValidationExprError, self).__init__("not matching the expression, got %s" % (repr(actual)), io, src_path) + self.actual = actual + + +class ConsistencyError(Exception): + def __init__(self, attr_id, actual, expected): + super(ConsistencyError, self).__init__("Check failed: %s, expected: %s, actual: %s" % (attr_id, repr(expected), repr(actual))) + self.id = attr_id + self.actual = actual + self.expected = expected diff --git a/serialize_py/kaitaistruct_sqlite3.py b/serialize_py/kaitaistruct_sqlite3.py new file mode 100644 index 0000000..12cf991 --- /dev/null +++ b/serialize_py/kaitaistruct_sqlite3.py @@ -0,0 +1,2120 @@ +# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild +# type: ignore + +import kaitaistruct +from kaitaistruct import ReadWriteKaitaiStruct, KaitaiStream, BytesIO +from enum import IntEnum + + +if getattr(kaitaistruct, 'API_VERSION', (0, 9)) < (0, 11): + raise Exception("Incompatible Kaitai Struct Python API: 0.11 or later is required, but you have %s" % (kaitaistruct.__version__)) + +import vlq_base128_be +class Sqlite3(ReadWriteKaitaiStruct): + """SQLite3 is a popular serverless SQL engine, implemented as a library + to be used within other applications. It keeps its databases as + regular disk files. + + Every database file is segmented into pages. First page (starting at + the very beginning) is special: it contains a file-global header + which specifies some data relevant to proper parsing (i.e. format + versions, size of page, etc). After the header, normal contents of + the first page follow. + + Each page would be of some type (btree, ptrmap, lock_byte, or free), + and generally, they would be reached via the links starting from the + first page. The first page is always a btree page for the implicitly + defined `sqlite_schema` table. + + This works well when parsing small database files. To parse large + database files, see the documentation for /instances/pages. + + Further documentation: + + - https://www.sqlite.org/arch.html + - https://medium.com/the-polyglot-programmer/what-would-sqlite-look-like-if-written-in-rust-part-3-edd2eefda473 + - https://cstack.github.io/db_tutorial/parts/part7.html + + Original sources: + + - https://github.com/sqlite/sqlite/blob/master/src/btree.h + - https://github.com/sqlite/sqlite/blob/master/src/btree.c + + .. seealso:: + Source - https://www.sqlite.org/fileformat2.html + """ + + class FormatVersion(IntEnum): + legacy = 1 + wal = 2 + + class BtreePageType(IntEnum): + index_interior_page = 2 + table_interior_page = 5 + index_leaf_page = 10 + table_leaf_page = 13 + + class PtrmapPageType(IntEnum): + root_page = 1 + free_page = 2 + overflow1 = 3 + overflow2 = 4 + btree = 5 + + class Serial(IntEnum): + nil = 0 + two_comp_8 = 1 + two_comp_16 = 2 + two_comp_24 = 3 + two_comp_32 = 4 + two_comp_48 = 5 + two_comp_64 = 6 + ieee754_64 = 7 + integer_0 = 8 + integer_1 = 9 + internal_1 = 10 + internal_2 = 11 + blob = 12 + string_utf8 = 13 + string_utf16_le = 14 + string_utf16_be = 15 + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._should_write_pages = False + self.pages__to_write = True + + def _read(self): + self.header = Sqlite3.DatabaseHeader(self._io, self, self._root) + self.header._read() + + + def _fetch_instances(self): + pass + self.header._fetch_instances() + _ = self.pages + for i in range(len(self._m_pages)): + pass + _on = (0 if (i == self.header.idx_lock_byte_page) else (1 if (((i >= self.header.idx_first_ptrmap_page)) and ((i <= self.header.idx_last_ptrmap_page))) else 2)) + if _on == 0: + pass + self.pages[i]._fetch_instances() + elif _on == 1: + pass + self.pages[i]._fetch_instances() + elif _on == 2: + pass + self.pages[i]._fetch_instances() + else: + pass + + + + def _write__seq(self, io=None): + super(Sqlite3, self)._write__seq(io) + self._should_write_pages = self.pages__to_write + self.header._write__seq(self._io) + + + def _check(self): + pass + if self.header._root != self._root: + raise kaitaistruct.ConsistencyError(u"header", self.header._root, self._root) + if self.header._parent != self: + raise kaitaistruct.ConsistencyError(u"header", self.header._parent, self) + + class LockBytePage(ReadWriteKaitaiStruct): + """The lock-byte page is the single page of the database file that contains the bytes at offsets between + 1073741824 and 1073742335, inclusive. A database file that is less than or equal to 1073741824 bytes + in size contains no lock-byte page. A database file larger than 1073741824 contains exactly one + lock-byte page. + The lock-byte page is set aside for use by the operating-system specific VFS implementation in implementing + the database file locking primitives. SQLite does not use the lock-byte page. + """ + def __init__(self, page_number, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.page_number = page_number + + def _read(self): + pass + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.LockBytePage, self)._write__seq(io) + + + def _check(self): + pass + + + class FreelistTrunkPagePointer(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self._should_write_page = False + self.page__to_write = True + + def _read(self): + self.page_number = self._io.read_u4be() + + + def _fetch_instances(self): + pass + if (self.page_number != 0): + pass + _ = self.page + self.page._fetch_instances() + + + + def _write__seq(self, io=None): + super(Sqlite3.FreelistTrunkPagePointer, self)._write__seq(io) + self._should_write_page = self.page__to_write + self._io.write_u4be(self.page_number) + + + def _check(self): + pass + + @property + def page(self): + if self._should_write_page: + self._write_page() + if hasattr(self, '_m_page'): + return self._m_page + + if (self.page_number != 0): + pass + io = self._root._io + _pos = io.pos() + io.seek(((self.page_number - 1) * self._root.header.page_size)) + self._raw__m_page = io.read_bytes(self._root.header.page_size) + _io__raw__m_page = KaitaiStream(BytesIO(self._raw__m_page)) + self._m_page = Sqlite3.FreelistTrunkPage(_io__raw__m_page, self, self._root) + self._m_page._read() + io.seek(_pos) + + return getattr(self, '_m_page', None) + + @page.setter + def page(self, v): + self._m_page = v + + def _write_page(self): + self._should_write_page = False + if (self.page_number != 0): + pass + io = self._root._io + _pos = io.pos() + io.seek(((self.page_number - 1) * self._root.header.page_size)) + _io__raw__m_page = KaitaiStream(BytesIO(bytearray(self._root.header.page_size))) + io.add_child_stream(_io__raw__m_page) + _pos2 = io.pos() + io.seek(io.pos() + (self._root.header.page_size)) + def handler(parent, _io__raw__m_page=_io__raw__m_page): + self._raw__m_page = _io__raw__m_page.to_byte_array() + if (len(self._raw__m_page) != self._root.header.page_size): + raise kaitaistruct.ConsistencyError(u"raw(page)", len(self._raw__m_page), self._root.header.page_size) + parent.write_bytes(self._raw__m_page) + _io__raw__m_page.write_back_handler = KaitaiStream.WriteBackHandler(_pos2, handler) + self.page._write__seq(_io__raw__m_page) + io.seek(_pos) + + + + def _check_page(self): + pass + if (self.page_number != 0): + pass + if self.page._root != self._root: + raise kaitaistruct.ConsistencyError(u"page", self.page._root, self._root) + if self.page._parent != self: + raise kaitaistruct.ConsistencyError(u"page", self.page._parent, self) + + + + class BtreePage(ReadWriteKaitaiStruct): + def __init__(self, page_number, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.page_number = page_number + self._should_write_cell_content_area = False + self.cell_content_area__to_write = True + self._should_write_reserved_space = False + self.reserved_space__to_write = True + + def _read(self): + self.page_type = KaitaiStream.resolve_enum(Sqlite3.BtreePageType, self._io.read_u1()) + self.first_freeblock = self._io.read_u2be() + self.num_cells = self._io.read_u2be() + self.ofs_cell_content_area_raw = self._io.read_u2be() + self.num_frag_free_bytes = self._io.read_u1() + if (((self.page_type == Sqlite3.BtreePageType.index_interior_page)) or ((self.page_type == Sqlite3.BtreePageType.table_interior_page))) : + pass + self.right_ptr = Sqlite3.BtreePagePointer(self._io, self, self._root) + self.right_ptr._read() + + self.cell_pointers = [] + for i in range(self.num_cells): + _t_cell_pointers = Sqlite3.CellPointer(self._io, self, self._root) + _t_cell_pointers._read() + self.cell_pointers.append(_t_cell_pointers) + + + + def _fetch_instances(self): + pass + if (((self.page_type == Sqlite3.BtreePageType.index_interior_page)) or ((self.page_type == Sqlite3.BtreePageType.table_interior_page))) : + pass + self.right_ptr._fetch_instances() + + for i in range(len(self.cell_pointers)): + pass + self.cell_pointers[i]._fetch_instances() + + _ = self.cell_content_area + if (self._root.header.page_reserved_space_size != 0): + pass + _ = self.reserved_space + + + + def _write__seq(self, io=None): + super(Sqlite3.BtreePage, self)._write__seq(io) + self._should_write_cell_content_area = self.cell_content_area__to_write + self._should_write_reserved_space = self.reserved_space__to_write + self._io.write_u1(int(self.page_type)) + self._io.write_u2be(self.first_freeblock) + self._io.write_u2be(self.num_cells) + self._io.write_u2be(self.ofs_cell_content_area_raw) + self._io.write_u1(self.num_frag_free_bytes) + if (((self.page_type == Sqlite3.BtreePageType.index_interior_page)) or ((self.page_type == Sqlite3.BtreePageType.table_interior_page))) : + pass + self.right_ptr._write__seq(self._io) + + for i in range(len(self.cell_pointers)): + pass + self.cell_pointers[i]._write__seq(self._io) + + + + def _check(self): + pass + if (((self.page_type == Sqlite3.BtreePageType.index_interior_page)) or ((self.page_type == Sqlite3.BtreePageType.table_interior_page))) : + pass + if self.right_ptr._root != self._root: + raise kaitaistruct.ConsistencyError(u"right_ptr", self.right_ptr._root, self._root) + if self.right_ptr._parent != self: + raise kaitaistruct.ConsistencyError(u"right_ptr", self.right_ptr._parent, self) + + if (len(self.cell_pointers) != self.num_cells): + raise kaitaistruct.ConsistencyError(u"cell_pointers", len(self.cell_pointers), self.num_cells) + for i in range(len(self.cell_pointers)): + pass + if self.cell_pointers[i]._root != self._root: + raise kaitaistruct.ConsistencyError(u"cell_pointers", self.cell_pointers[i]._root, self._root) + if self.cell_pointers[i]._parent != self: + raise kaitaistruct.ConsistencyError(u"cell_pointers", self.cell_pointers[i]._parent, self) + + + @property + def ofs_cell_content_area(self): + if hasattr(self, '_m_ofs_cell_content_area'): + return self._m_ofs_cell_content_area + + self._m_ofs_cell_content_area = (65536 if (self.ofs_cell_content_area_raw == 0) else self.ofs_cell_content_area_raw) + return getattr(self, '_m_ofs_cell_content_area', None) + + def _invalidate_ofs_cell_content_area(self): + del self._m_ofs_cell_content_area + @property + def cell_content_area(self): + if self._should_write_cell_content_area: + self._write_cell_content_area() + if hasattr(self, '_m_cell_content_area'): + return self._m_cell_content_area + + _pos = self._io.pos() + self._io.seek(self.ofs_cell_content_area) + self._m_cell_content_area = self._io.read_bytes((self._root.header.usable_size - self.ofs_cell_content_area)) + self._io.seek(_pos) + return getattr(self, '_m_cell_content_area', None) + + @cell_content_area.setter + def cell_content_area(self, v): + self._m_cell_content_area = v + + def _write_cell_content_area(self): + self._should_write_cell_content_area = False + _pos = self._io.pos() + self._io.seek(self.ofs_cell_content_area) + self._io.write_bytes(self.cell_content_area) + self._io.seek(_pos) + + + def _check_cell_content_area(self): + pass + if (len(self.cell_content_area) != (self._root.header.usable_size - self.ofs_cell_content_area)): + raise kaitaistruct.ConsistencyError(u"cell_content_area", len(self.cell_content_area), (self._root.header.usable_size - self.ofs_cell_content_area)) + + @property + def reserved_space(self): + if self._should_write_reserved_space: + self._write_reserved_space() + if hasattr(self, '_m_reserved_space'): + return self._m_reserved_space + + if (self._root.header.page_reserved_space_size != 0): + pass + _pos = self._io.pos() + self._io.seek((self._root.header.page_size - self._root.header.page_reserved_space_size)) + self._m_reserved_space = self._io.read_bytes_full() + self._io.seek(_pos) + + return getattr(self, '_m_reserved_space', None) + + @reserved_space.setter + def reserved_space(self, v): + self._m_reserved_space = v + + def _write_reserved_space(self): + self._should_write_reserved_space = False + if (self._root.header.page_reserved_space_size != 0): + pass + _pos = self._io.pos() + self._io.seek((self._root.header.page_size - self._root.header.page_reserved_space_size)) + self._io.write_bytes(self.reserved_space) + if not self._io.is_eof(): + raise kaitaistruct.ConsistencyError(u"reserved_space", self._io.size() - self._io.pos(), 0) + self._io.seek(_pos) + + + + def _check_reserved_space(self): + pass + if (self._root.header.page_reserved_space_size != 0): + pass + + + + class BtreePagePointer(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self._should_write_page = False + self.page__to_write = True + + def _read(self): + self.page_number = self._io.read_u4be() + + + def _fetch_instances(self): + pass + if (self.page_number != 0): + pass + _ = self.page + self.page._fetch_instances() + + + + def _write__seq(self, io=None): + super(Sqlite3.BtreePagePointer, self)._write__seq(io) + self._should_write_page = self.page__to_write + self._io.write_u4be(self.page_number) + + + def _check(self): + pass + + @property + def page(self): + if self._should_write_page: + self._write_page() + if hasattr(self, '_m_page'): + return self._m_page + + if (self.page_number != 0): + pass + io = self._root._io + _pos = io.pos() + io.seek(((self.page_number - 1) * self._root.header.page_size)) + self._raw__m_page = io.read_bytes(self._root.header.page_size) + _io__raw__m_page = KaitaiStream(BytesIO(self._raw__m_page)) + self._m_page = Sqlite3.BtreePage(self.page_number, _io__raw__m_page, self, self._root) + self._m_page._read() + io.seek(_pos) + + return getattr(self, '_m_page', None) + + @page.setter + def page(self, v): + self._m_page = v + + def _write_page(self): + self._should_write_page = False + if (self.page_number != 0): + pass + io = self._root._io + _pos = io.pos() + io.seek(((self.page_number - 1) * self._root.header.page_size)) + _io__raw__m_page = KaitaiStream(BytesIO(bytearray(self._root.header.page_size))) + io.add_child_stream(_io__raw__m_page) + _pos2 = io.pos() + io.seek(io.pos() + (self._root.header.page_size)) + def handler(parent, _io__raw__m_page=_io__raw__m_page): + self._raw__m_page = _io__raw__m_page.to_byte_array() + if (len(self._raw__m_page) != self._root.header.page_size): + raise kaitaistruct.ConsistencyError(u"raw(page)", len(self._raw__m_page), self._root.header.page_size) + parent.write_bytes(self._raw__m_page) + _io__raw__m_page.write_back_handler = KaitaiStream.WriteBackHandler(_pos2, handler) + self.page._write__seq(_io__raw__m_page) + io.seek(_pos) + + + + def _check_page(self): + pass + if (self.page_number != 0): + pass + if self.page._root != self._root: + raise kaitaistruct.ConsistencyError(u"page", self.page._root, self._root) + if self.page._parent != self: + raise kaitaistruct.ConsistencyError(u"page", self.page._parent, self) + if (self.page.page_number != self.page_number): + raise kaitaistruct.ConsistencyError(u"page", self.page.page_number, self.page_number) + + + + class OverflowPage(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.next_page_number = Sqlite3.OverflowPagePointer(self._io, self, self._root) + self.next_page_number._read() + self.content = self._io.read_bytes((self._root.header.page_size - 4)) + + + def _fetch_instances(self): + pass + self.next_page_number._fetch_instances() + + + def _write__seq(self, io=None): + super(Sqlite3.OverflowPage, self)._write__seq(io) + self.next_page_number._write__seq(self._io) + self._io.write_bytes(self.content) + + + def _check(self): + pass + if self.next_page_number._root != self._root: + raise kaitaistruct.ConsistencyError(u"next_page_number", self.next_page_number._root, self._root) + if self.next_page_number._parent != self: + raise kaitaistruct.ConsistencyError(u"next_page_number", self.next_page_number._parent, self) + if (len(self.content) != (self._root.header.page_size - 4)): + raise kaitaistruct.ConsistencyError(u"content", len(self.content), (self._root.header.page_size - 4)) + + + class Int0(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + pass + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.Int0, self)._write__seq(io) + + + def _check(self): + pass + + + class OverflowRecord(ReadWriteKaitaiStruct): + def __init__(self, payload_size, overflow_payload_size_max, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.payload_size = payload_size + self.overflow_payload_size_max = overflow_payload_size_max + + def _read(self): + self.inline_payload = self._io.read_bytes((self.inline_payload_size if (self.inline_payload_size <= self.overflow_payload_size_max) else self._root.header.overflow_min_payload_size)) + self.overflow_page_number = Sqlite3.OverflowPagePointer(self._io, self, self._root) + self.overflow_page_number._read() + + + def _fetch_instances(self): + pass + self.overflow_page_number._fetch_instances() + + + def _write__seq(self, io=None): + super(Sqlite3.OverflowRecord, self)._write__seq(io) + self._io.write_bytes(self.inline_payload) + self.overflow_page_number._write__seq(self._io) + + + def _check(self): + pass + if (len(self.inline_payload) != (self.inline_payload_size if (self.inline_payload_size <= self.overflow_payload_size_max) else self._root.header.overflow_min_payload_size)): + raise kaitaistruct.ConsistencyError(u"inline_payload", len(self.inline_payload), (self.inline_payload_size if (self.inline_payload_size <= self.overflow_payload_size_max) else self._root.header.overflow_min_payload_size)) + if self.overflow_page_number._root != self._root: + raise kaitaistruct.ConsistencyError(u"overflow_page_number", self.overflow_page_number._root, self._root) + if self.overflow_page_number._parent != self: + raise kaitaistruct.ConsistencyError(u"overflow_page_number", self.overflow_page_number._parent, self) + + @property + def inline_payload_size(self): + if hasattr(self, '_m_inline_payload_size'): + return self._m_inline_payload_size + + self._m_inline_payload_size = (self._root.header.overflow_min_payload_size + ((self.payload_size - self._root.header.overflow_min_payload_size) % (self._root.header.usable_size - 4))) + return getattr(self, '_m_inline_payload_size', None) + + def _invalidate_inline_payload_size(self): + del self._m_inline_payload_size + + class FreelistTrunkPage(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.next_page = Sqlite3.FreelistTrunkPagePointer(self._io, self, self._root) + self.next_page._read() + self.num_free_pages = self._io.read_u4be() + self.free_pages = [] + for i in range(self.num_free_pages): + self.free_pages.append(self._io.read_u4be()) + + + + def _fetch_instances(self): + pass + self.next_page._fetch_instances() + for i in range(len(self.free_pages)): + pass + + + + def _write__seq(self, io=None): + super(Sqlite3.FreelistTrunkPage, self)._write__seq(io) + self.next_page._write__seq(self._io) + self._io.write_u4be(self.num_free_pages) + for i in range(len(self.free_pages)): + pass + self._io.write_u4be(self.free_pages[i]) + + + + def _check(self): + pass + if self.next_page._root != self._root: + raise kaitaistruct.ConsistencyError(u"next_page", self.next_page._root, self._root) + if self.next_page._parent != self: + raise kaitaistruct.ConsistencyError(u"next_page", self.next_page._parent, self) + if (len(self.free_pages) != self.num_free_pages): + raise kaitaistruct.ConsistencyError(u"free_pages", len(self.free_pages), self.num_free_pages) + for i in range(len(self.free_pages)): + pass + + + + class StringUtf16Be(ReadWriteKaitaiStruct): + def __init__(self, len_value, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.len_value = len_value + + def _read(self): + self.value = (self._io.read_bytes(self.len_value)).decode("UTF-16BE") + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.StringUtf16Be, self)._write__seq(io) + self._io.write_bytes((self.value).encode(u"UTF-16BE")) + + + def _check(self): + pass + if (len((self.value).encode(u"UTF-16BE")) != self.len_value): + raise kaitaistruct.ConsistencyError(u"value", len((self.value).encode(u"UTF-16BE")), self.len_value) + + + class NullValue(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + pass + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.NullValue, self)._write__seq(io) + + + def _check(self): + pass + + + class Int1(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + pass + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.Int1, self)._write__seq(io) + + + def _check(self): + pass + + + class OverflowPagePointer(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self._should_write_page = False + self.page__to_write = True + + def _read(self): + self.page_number = self._io.read_u4be() + + + def _fetch_instances(self): + pass + if (self.page_number != 0): + pass + _ = self.page + self.page._fetch_instances() + + + + def _write__seq(self, io=None): + super(Sqlite3.OverflowPagePointer, self)._write__seq(io) + self._should_write_page = self.page__to_write + self._io.write_u4be(self.page_number) + + + def _check(self): + pass + + @property + def page(self): + if self._should_write_page: + self._write_page() + if hasattr(self, '_m_page'): + return self._m_page + + if (self.page_number != 0): + pass + io = self._root._io + _pos = io.pos() + io.seek(((self.page_number - 1) * self._root.header.page_size)) + self._raw__m_page = io.read_bytes(self._root.header.page_size) + _io__raw__m_page = KaitaiStream(BytesIO(self._raw__m_page)) + self._m_page = Sqlite3.OverflowPage(_io__raw__m_page, self, self._root) + self._m_page._read() + io.seek(_pos) + + return getattr(self, '_m_page', None) + + @page.setter + def page(self, v): + self._m_page = v + + def _write_page(self): + self._should_write_page = False + if (self.page_number != 0): + pass + io = self._root._io + _pos = io.pos() + io.seek(((self.page_number - 1) * self._root.header.page_size)) + _io__raw__m_page = KaitaiStream(BytesIO(bytearray(self._root.header.page_size))) + io.add_child_stream(_io__raw__m_page) + _pos2 = io.pos() + io.seek(io.pos() + (self._root.header.page_size)) + def handler(parent, _io__raw__m_page=_io__raw__m_page): + self._raw__m_page = _io__raw__m_page.to_byte_array() + if (len(self._raw__m_page) != self._root.header.page_size): + raise kaitaistruct.ConsistencyError(u"raw(page)", len(self._raw__m_page), self._root.header.page_size) + parent.write_bytes(self._raw__m_page) + _io__raw__m_page.write_back_handler = KaitaiStream.WriteBackHandler(_pos2, handler) + self.page._write__seq(_io__raw__m_page) + io.seek(_pos) + + + + def _check_page(self): + pass + if (self.page_number != 0): + pass + if self.page._root != self._root: + raise kaitaistruct.ConsistencyError(u"page", self.page._root, self._root) + if self.page._parent != self: + raise kaitaistruct.ConsistencyError(u"page", self.page._parent, self) + + + + class SerialType(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.raw_value = vlq_base128_be.VlqBase128Be(self._io) + self.raw_value._read() + + + def _fetch_instances(self): + pass + self.raw_value._fetch_instances() + + + def _write__seq(self, io=None): + super(Sqlite3.SerialType, self)._write__seq(io) + self.raw_value._write__seq(self._io) + + + def _check(self): + pass + + @property + def type(self): + if hasattr(self, '_m_type'): + return self._m_type + + self._m_type = KaitaiStream.resolve_enum(Sqlite3.Serial, ((12 if ((self.raw_value.value % 2) == 0) else ((13 + self._root.header.text_encoding) - 1)) if (self.raw_value.value >= 12) else self.raw_value.value)) + return getattr(self, '_m_type', None) + + def _invalidate_type(self): + del self._m_type + @property + def len_blob_string(self): + if hasattr(self, '_m_len_blob_string'): + return self._m_len_blob_string + + if (self.raw_value.value >= 12): + pass + self._m_len_blob_string = ((self.raw_value.value - 12) // 2 if ((self.raw_value.value % 2) == 0) else (self.raw_value.value - 13) // 2) + + return getattr(self, '_m_len_blob_string', None) + + def _invalidate_len_blob_string(self): + del self._m_len_blob_string + + class IndexLeafCell(ReadWriteKaitaiStruct): + """ + .. seealso:: + Source - https://www.sqlite.org/fileformat2.html#b_tree_pages + """ + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.payload_size = vlq_base128_be.VlqBase128Be(self._io) + self.payload_size._read() + _on = (1 if (self.payload_size.value > self._root.header.index_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload = Sqlite3.Record(self._io, self, self._root) + self.payload._read() + elif _on == 1: + pass + self.payload = Sqlite3.OverflowRecord(self.payload_size.value, self._root.header.index_max_overflow_payload_size, self._io, self, self._root) + self.payload._read() + + + def _fetch_instances(self): + pass + self.payload_size._fetch_instances() + _on = (1 if (self.payload_size.value > self._root.header.index_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload._fetch_instances() + elif _on == 1: + pass + self.payload._fetch_instances() + + + def _write__seq(self, io=None): + super(Sqlite3.IndexLeafCell, self)._write__seq(io) + self.payload_size._write__seq(self._io) + _on = (1 if (self.payload_size.value > self._root.header.index_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload._write__seq(self._io) + elif _on == 1: + pass + self.payload._write__seq(self._io) + + + def _check(self): + pass + _on = (1 if (self.payload_size.value > self._root.header.index_max_overflow_payload_size) else 0) + if _on == 0: + pass + if self.payload._root != self._root: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._root, self._root) + if self.payload._parent != self: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._parent, self) + elif _on == 1: + pass + if self.payload._root != self._root: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._root, self._root) + if self.payload._parent != self: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._parent, self) + if (self.payload.payload_size != self.payload_size.value): + raise kaitaistruct.ConsistencyError(u"payload", self.payload.payload_size, self.payload_size.value) + if (self.payload.overflow_payload_size_max != self._root.header.index_max_overflow_payload_size): + raise kaitaistruct.ConsistencyError(u"payload", self.payload.overflow_payload_size_max, self._root.header.index_max_overflow_payload_size) + + + class PointerMapPage(ReadWriteKaitaiStruct): + """A ptrmap page contains back-links from child to parent. + See also: /types/pointer_map_entry. + + Pointer map pages (or "ptrmap pages") + are extra pages inserted into the database + to make the operation of auto_vacuum and + incremental_vacuum modes more efficient. + + Ptrmap pages must exist in any database file + which has a non-zero largest root b-tree page value + in db.header.largest_root_page. + + If db.header.largest_root_page is zero, + then the database must not contain ptrmap pages. + + The first ptrmap page (on page 2) + will contain back pointer information + for pages 3 through J+2, inclusive. + + The second pointer map page will be on page J+3 + and that ptrmap page will provide back pointer information + for pages J+4 through 2*J+3 inclusive. + + And so forth for the entire database file. + + ```py + page_size = 512 + page_reserved_space_size = 0 + U = usable_size = page_size - page_reserved_space_size # 512 + J = pointer_map_page_entries_max = usable_size // 5 # 102 + + # pointer map 1 + X = 1 + N = pointer_map_page_number_raw = ((X - 1) * J) + 1 + X # 2 + A = first_linked_page_number = N + 1 # 3 + Z = last_linked_page_number = N + J # 104 = J + 2 + + # pointer map 2 + X = 2 + N = pointer_map_page_number = ((X - 1) * J) + 1 + X # 105 = J + 3 + A = first_linked_page_number = N + 1 # 106 = J + 4 + Z = last_linked_page_number = N + J # 207 = (2 * J) + 3 + + # pointer map 3 + X = 3 + N = pointer_map_page_number = ((X - 1) * J) + 1 + X # 208 + A = first_linked_page_number = N + 1 # 209 + Z = last_linked_page_number = N + J # 310 + + # pointer map 4 + X = 4 + N = pointer_map_page_number = ((X - 1) * J) + 1 + X # 311 + A = first_linked_page_number = N + 1 # 312 + Z = last_linked_page_number = N + J # 413 + ``` + + actual pointer_map_page_number: + + ```py + NR = pointer_map_page_number_raw = ((X - 1) * J) + 1 + X # 2 + N = pointer_map_page_number = ( + pointer_map_page_number_raw + if (pointer_map_page_number_raw != lock_byte_page_number) + else (pointer_map_page_number_raw + 1) + ) + ``` + + .. seealso:: + Source - https://www.sqlite.org/fileformat2.html#pointer_map_or_ptrmap_pages + """ + def __init__(self, pointer_map_page_number, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.pointer_map_page_number = pointer_map_page_number + + def _read(self): + self.entries = [] + for i in range(self.num_entries): + _t_entries = Sqlite3.PointerMapEntry(self._io, self, self._root) + _t_entries._read() + self.entries.append(_t_entries) + + + + def _fetch_instances(self): + pass + for i in range(len(self.entries)): + pass + self.entries[i]._fetch_instances() + + + + def _write__seq(self, io=None): + super(Sqlite3.PointerMapPage, self)._write__seq(io) + for i in range(len(self.entries)): + pass + self.entries[i]._write__seq(self._io) + + + + def _check(self): + pass + if (len(self.entries) != self.num_entries): + raise kaitaistruct.ConsistencyError(u"entries", len(self.entries), self.num_entries) + for i in range(len(self.entries)): + pass + if self.entries[i]._root != self._root: + raise kaitaistruct.ConsistencyError(u"entries", self.entries[i]._root, self._root) + if self.entries[i]._parent != self: + raise kaitaistruct.ConsistencyError(u"entries", self.entries[i]._parent, self) + + + @property + def last_linked_page_number(self): + if hasattr(self, '_m_last_linked_page_number'): + return self._m_last_linked_page_number + + self._m_last_linked_page_number = (self.last_linked_page_number_max if (self.last_linked_page_number_max <= self._root.header.num_pages) else self._root.header.num_pages) + return getattr(self, '_m_last_linked_page_number', None) + + def _invalidate_last_linked_page_number(self): + del self._m_last_linked_page_number + @property + def last_linked_page_number_max(self): + if hasattr(self, '_m_last_linked_page_number_max'): + return self._m_last_linked_page_number_max + + self._m_last_linked_page_number_max = (self.pointer_map_page_number + self.pointer_map_page_entries_max) + return getattr(self, '_m_last_linked_page_number_max', None) + + def _invalidate_last_linked_page_number_max(self): + del self._m_last_linked_page_number_max + @property + def first_linked_page_number(self): + if hasattr(self, '_m_first_linked_page_number'): + return self._m_first_linked_page_number + + self._m_first_linked_page_number = (self.pointer_map_page_number + 1) + return getattr(self, '_m_first_linked_page_number', None) + + def _invalidate_first_linked_page_number(self): + del self._m_first_linked_page_number + @property + def num_entries(self): + if hasattr(self, '_m_num_entries'): + return self._m_num_entries + + self._m_num_entries = ((self.last_linked_page_number - self.first_linked_page_number) + 1) + return getattr(self, '_m_num_entries', None) + + def _invalidate_num_entries(self): + del self._m_num_entries + @property + def pointer_map_page_entries_max(self): + if hasattr(self, '_m_pointer_map_page_entries_max'): + return self._m_pointer_map_page_entries_max + + self._m_pointer_map_page_entries_max = self._root.header.usable_size // 5 + return getattr(self, '_m_pointer_map_page_entries_max', None) + + def _invalidate_pointer_map_page_entries_max(self): + del self._m_pointer_map_page_entries_max + + class IndexInteriorCell(ReadWriteKaitaiStruct): + """ + .. seealso:: + Source - https://www.sqlite.org/fileformat2.html#b_tree_pages + """ + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.left_child_page = Sqlite3.BtreePagePointer(self._io, self, self._root) + self.left_child_page._read() + self.payload_size = vlq_base128_be.VlqBase128Be(self._io) + self.payload_size._read() + _on = (1 if (self.payload_size.value > self._root.header.index_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload = Sqlite3.Record(self._io, self, self._root) + self.payload._read() + elif _on == 1: + pass + self.payload = Sqlite3.OverflowRecord(self.payload_size.value, self._root.header.index_max_overflow_payload_size, self._io, self, self._root) + self.payload._read() + + + def _fetch_instances(self): + pass + self.left_child_page._fetch_instances() + self.payload_size._fetch_instances() + _on = (1 if (self.payload_size.value > self._root.header.index_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload._fetch_instances() + elif _on == 1: + pass + self.payload._fetch_instances() + + + def _write__seq(self, io=None): + super(Sqlite3.IndexInteriorCell, self)._write__seq(io) + self.left_child_page._write__seq(self._io) + self.payload_size._write__seq(self._io) + _on = (1 if (self.payload_size.value > self._root.header.index_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload._write__seq(self._io) + elif _on == 1: + pass + self.payload._write__seq(self._io) + + + def _check(self): + pass + if self.left_child_page._root != self._root: + raise kaitaistruct.ConsistencyError(u"left_child_page", self.left_child_page._root, self._root) + if self.left_child_page._parent != self: + raise kaitaistruct.ConsistencyError(u"left_child_page", self.left_child_page._parent, self) + _on = (1 if (self.payload_size.value > self._root.header.index_max_overflow_payload_size) else 0) + if _on == 0: + pass + if self.payload._root != self._root: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._root, self._root) + if self.payload._parent != self: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._parent, self) + elif _on == 1: + pass + if self.payload._root != self._root: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._root, self._root) + if self.payload._parent != self: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._parent, self) + if (self.payload.payload_size != self.payload_size.value): + raise kaitaistruct.ConsistencyError(u"payload", self.payload.payload_size, self.payload_size.value) + if (self.payload.overflow_payload_size_max != self._root.header.index_max_overflow_payload_size): + raise kaitaistruct.ConsistencyError(u"payload", self.payload.overflow_payload_size_max, self._root.header.index_max_overflow_payload_size) + + + class PointerMapEntry(ReadWriteKaitaiStruct): + """ + .. seealso:: + Source - https://www.sqlite.org/fileformat2.html#pointer_map_or_ptrmap_pages + """ + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.type = KaitaiStream.resolve_enum(Sqlite3.PtrmapPageType, self._io.read_u1()) + self.page_number = self._io.read_u4be() + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.PointerMapEntry, self)._write__seq(io) + self._io.write_u1(int(self.type)) + self._io.write_u4be(self.page_number) + + + def _check(self): + pass + + + class StringUtf8(ReadWriteKaitaiStruct): + def __init__(self, len_value, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.len_value = len_value + + def _read(self): + self.value = (self._io.read_bytes(self.len_value)).decode("UTF-8") + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.StringUtf8, self)._write__seq(io) + self._io.write_bytes((self.value).encode(u"UTF-8")) + + + def _check(self): + pass + if (len((self.value).encode(u"UTF-8")) != self.len_value): + raise kaitaistruct.ConsistencyError(u"value", len((self.value).encode(u"UTF-8")), self.len_value) + + + class RecordHeader(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.value_types = [] + i = 0 + while not self._io.is_eof(): + _t_value_types = Sqlite3.SerialType(self._io, self, self._root) + _t_value_types._read() + self.value_types.append(_t_value_types) + i += 1 + + + + def _fetch_instances(self): + pass + for i in range(len(self.value_types)): + pass + self.value_types[i]._fetch_instances() + + + + def _write__seq(self, io=None): + super(Sqlite3.RecordHeader, self)._write__seq(io) + for i in range(len(self.value_types)): + pass + if self._io.is_eof(): + raise kaitaistruct.ConsistencyError(u"value_types", self._io.size() - self._io.pos(), 0) + self.value_types[i]._write__seq(self._io) + + if not self._io.is_eof(): + raise kaitaistruct.ConsistencyError(u"value_types", self._io.size() - self._io.pos(), 0) + + + def _check(self): + pass + for i in range(len(self.value_types)): + pass + if self.value_types[i]._root != self._root: + raise kaitaistruct.ConsistencyError(u"value_types", self.value_types[i]._root, self._root) + if self.value_types[i]._parent != self: + raise kaitaistruct.ConsistencyError(u"value_types", self.value_types[i]._parent, self) + + + + class StringUtf16Le(ReadWriteKaitaiStruct): + def __init__(self, len_value, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.len_value = len_value + + def _read(self): + self.value = (self._io.read_bytes(self.len_value)).decode("UTF-16LE") + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.StringUtf16Le, self)._write__seq(io) + self._io.write_bytes((self.value).encode(u"UTF-16LE")) + + + def _check(self): + pass + if (len((self.value).encode(u"UTF-16LE")) != self.len_value): + raise kaitaistruct.ConsistencyError(u"value", len((self.value).encode(u"UTF-16LE")), self.len_value) + + + class TableInteriorCell(ReadWriteKaitaiStruct): + """ + .. seealso:: + Source - https://www.sqlite.org/fileformat2.html#b_tree_pages + """ + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.left_child_page = Sqlite3.BtreePagePointer(self._io, self, self._root) + self.left_child_page._read() + self.row_id = vlq_base128_be.VlqBase128Be(self._io) + self.row_id._read() + + + def _fetch_instances(self): + pass + self.left_child_page._fetch_instances() + self.row_id._fetch_instances() + + + def _write__seq(self, io=None): + super(Sqlite3.TableInteriorCell, self)._write__seq(io) + self.left_child_page._write__seq(self._io) + self.row_id._write__seq(self._io) + + + def _check(self): + pass + if self.left_child_page._root != self._root: + raise kaitaistruct.ConsistencyError(u"left_child_page", self.left_child_page._root, self._root) + if self.left_child_page._parent != self: + raise kaitaistruct.ConsistencyError(u"left_child_page", self.left_child_page._parent, self) + + + class DatabaseHeader(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.magic = self._io.read_bytes(16) + if not (self.magic == b"\x53\x51\x4C\x69\x74\x65\x20\x66\x6F\x72\x6D\x61\x74\x20\x33\x00"): + raise kaitaistruct.ValidationNotEqualError(b"\x53\x51\x4C\x69\x74\x65\x20\x66\x6F\x72\x6D\x61\x74\x20\x33\x00", self.magic, self._io, u"/types/database_header/seq/0") + self.page_size_raw = self._io.read_u2be() + self.write_version = KaitaiStream.resolve_enum(Sqlite3.FormatVersion, self._io.read_u1()) + self.read_version = KaitaiStream.resolve_enum(Sqlite3.FormatVersion, self._io.read_u1()) + self.page_reserved_space_size = self._io.read_u1() + self.max_payload_fraction = self._io.read_u1() + self.min_payload_fraction = self._io.read_u1() + self.leaf_payload_fraction = self._io.read_u1() + self.file_change_counter = self._io.read_u4be() + self.num_pages = self._io.read_u4be() + self.first_freelist_trunk_page = Sqlite3.FreelistTrunkPagePointer(self._io, self, self._root) + self.first_freelist_trunk_page._read() + self.num_freelist_pages = self._io.read_u4be() + self.schema_cookie = self._io.read_u4be() + self.schema_format = self._io.read_u4be() + self.default_page_cache_size = self._io.read_u4be() + self.largest_root_page = self._io.read_u4be() + self.text_encoding = self._io.read_u4be() + self.user_version = self._io.read_u4be() + self.is_incremental_vacuum = self._io.read_u4be() + self.application_id = self._io.read_u4be() + self.reserved_header_bytes = self._io.read_bytes(20) + self.version_valid_for = self._io.read_u4be() + self.sqlite_version_number = self._io.read_u4be() + + + def _fetch_instances(self): + pass + self.first_freelist_trunk_page._fetch_instances() + + + def _write__seq(self, io=None): + super(Sqlite3.DatabaseHeader, self)._write__seq(io) + self._io.write_bytes(self.magic) + self._io.write_u2be(self.page_size_raw) + self._io.write_u1(int(self.write_version)) + self._io.write_u1(int(self.read_version)) + self._io.write_u1(self.page_reserved_space_size) + self._io.write_u1(self.max_payload_fraction) + self._io.write_u1(self.min_payload_fraction) + self._io.write_u1(self.leaf_payload_fraction) + self._io.write_u4be(self.file_change_counter) + self._io.write_u4be(self.num_pages) + self.first_freelist_trunk_page._write__seq(self._io) + self._io.write_u4be(self.num_freelist_pages) + self._io.write_u4be(self.schema_cookie) + self._io.write_u4be(self.schema_format) + self._io.write_u4be(self.default_page_cache_size) + self._io.write_u4be(self.largest_root_page) + self._io.write_u4be(self.text_encoding) + self._io.write_u4be(self.user_version) + self._io.write_u4be(self.is_incremental_vacuum) + self._io.write_u4be(self.application_id) + self._io.write_bytes(self.reserved_header_bytes) + self._io.write_u4be(self.version_valid_for) + self._io.write_u4be(self.sqlite_version_number) + + + def _check(self): + pass + if (len(self.magic) != 16): + raise kaitaistruct.ConsistencyError(u"magic", len(self.magic), 16) + if not (self.magic == b"\x53\x51\x4C\x69\x74\x65\x20\x66\x6F\x72\x6D\x61\x74\x20\x33\x00"): + raise kaitaistruct.ValidationNotEqualError(b"\x53\x51\x4C\x69\x74\x65\x20\x66\x6F\x72\x6D\x61\x74\x20\x33\x00", self.magic, None, u"/types/database_header/seq/0") + if self.first_freelist_trunk_page._root != self._root: + raise kaitaistruct.ConsistencyError(u"first_freelist_trunk_page", self.first_freelist_trunk_page._root, self._root) + if self.first_freelist_trunk_page._parent != self: + raise kaitaistruct.ConsistencyError(u"first_freelist_trunk_page", self.first_freelist_trunk_page._parent, self) + if (len(self.reserved_header_bytes) != 20): + raise kaitaistruct.ConsistencyError(u"reserved_header_bytes", len(self.reserved_header_bytes), 20) + + @property + def num_ptrmap_pages(self): + """The number of ptrmap pages in the database.""" + if hasattr(self, '_m_num_ptrmap_pages'): + return self._m_num_ptrmap_pages + + self._m_num_ptrmap_pages = ((self.num_pages // self.num_ptrmap_entries_max + 1) if (self.idx_first_ptrmap_page > 0) else 0) + return getattr(self, '_m_num_ptrmap_pages', None) + + def _invalidate_num_ptrmap_pages(self): + del self._m_num_ptrmap_pages + @property + def idx_last_ptrmap_page(self): + """The index (0-based) of the last ptrmap page (inclusive).""" + if hasattr(self, '_m_idx_last_ptrmap_page'): + return self._m_idx_last_ptrmap_page + + self._m_idx_last_ptrmap_page = ((self.idx_first_ptrmap_page + self.num_ptrmap_pages) - (0 if ((self.idx_first_ptrmap_page + self.num_ptrmap_pages) >= self.idx_lock_byte_page) else 1)) + return getattr(self, '_m_idx_last_ptrmap_page', None) + + def _invalidate_idx_last_ptrmap_page(self): + del self._m_idx_last_ptrmap_page + @property + def idx_first_ptrmap_page(self): + """The index (0-based) of the first ptrmap page.""" + if hasattr(self, '_m_idx_first_ptrmap_page'): + return self._m_idx_first_ptrmap_page + + self._m_idx_first_ptrmap_page = (1 if (self.largest_root_page > 0) else 0) + return getattr(self, '_m_idx_first_ptrmap_page', None) + + def _invalidate_idx_first_ptrmap_page(self): + del self._m_idx_first_ptrmap_page + @property + def overflow_min_payload_size(self): + """The minimum amount of payload that must be stored on the btree page before spilling is allowed.""" + if hasattr(self, '_m_overflow_min_payload_size'): + return self._m_overflow_min_payload_size + + self._m_overflow_min_payload_size = (((self.usable_size - 12) * 32) // 255 - 23) + return getattr(self, '_m_overflow_min_payload_size', None) + + def _invalidate_overflow_min_payload_size(self): + del self._m_overflow_min_payload_size + @property + def num_ptrmap_entries_max(self): + """The maximum number of ptrmap entries per ptrmap page.""" + if hasattr(self, '_m_num_ptrmap_entries_max'): + return self._m_num_ptrmap_entries_max + + self._m_num_ptrmap_entries_max = self.usable_size // 5 + return getattr(self, '_m_num_ptrmap_entries_max', None) + + def _invalidate_num_ptrmap_entries_max(self): + del self._m_num_ptrmap_entries_max + @property + def idx_lock_byte_page(self): + if hasattr(self, '_m_idx_lock_byte_page'): + return self._m_idx_lock_byte_page + + self._m_idx_lock_byte_page = 1073741824 // self.page_size + return getattr(self, '_m_idx_lock_byte_page', None) + + def _invalidate_idx_lock_byte_page(self): + del self._m_idx_lock_byte_page + @property + def page_size(self): + """The database page size in bytes.""" + if hasattr(self, '_m_page_size'): + return self._m_page_size + + self._m_page_size = (65536 if (self.page_size_raw == 1) else self.page_size_raw) + return getattr(self, '_m_page_size', None) + + def _invalidate_page_size(self): + del self._m_page_size + @property + def table_max_overflow_payload_size(self): + """The maximum amount of payload that can be stored directly on the b-tree page without spilling onto an overflow page. Value for table page.""" + if hasattr(self, '_m_table_max_overflow_payload_size'): + return self._m_table_max_overflow_payload_size + + self._m_table_max_overflow_payload_size = (self.usable_size - 35) + return getattr(self, '_m_table_max_overflow_payload_size', None) + + def _invalidate_table_max_overflow_payload_size(self): + del self._m_table_max_overflow_payload_size + @property + def index_max_overflow_payload_size(self): + """The maximum amount of payload that can be stored directly on the b-tree page without spilling onto an overflow page. Value for index page.""" + if hasattr(self, '_m_index_max_overflow_payload_size'): + return self._m_index_max_overflow_payload_size + + self._m_index_max_overflow_payload_size = (((self.usable_size - 12) * 64) // 255 - 23) + return getattr(self, '_m_index_max_overflow_payload_size', None) + + def _invalidate_index_max_overflow_payload_size(self): + del self._m_index_max_overflow_payload_size + @property + def usable_size(self): + """The "usable size" of a database page.""" + if hasattr(self, '_m_usable_size'): + return self._m_usable_size + + self._m_usable_size = (self.page_size - self.page_reserved_space_size) + return getattr(self, '_m_usable_size', None) + + def _invalidate_usable_size(self): + del self._m_usable_size + + class TableLeafCell(ReadWriteKaitaiStruct): + """ + .. seealso:: + Source - https://www.sqlite.org/fileformat2.html#b_tree_pages + """ + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.payload_size = vlq_base128_be.VlqBase128Be(self._io) + self.payload_size._read() + self.row_id = vlq_base128_be.VlqBase128Be(self._io) + self.row_id._read() + _on = (1 if (self.payload_size.value > self._root.header.table_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload = Sqlite3.Record(self._io, self, self._root) + self.payload._read() + elif _on == 1: + pass + self.payload = Sqlite3.OverflowRecord(self.payload_size.value, self._root.header.table_max_overflow_payload_size, self._io, self, self._root) + self.payload._read() + + + def _fetch_instances(self): + pass + self.payload_size._fetch_instances() + self.row_id._fetch_instances() + _on = (1 if (self.payload_size.value > self._root.header.table_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload._fetch_instances() + elif _on == 1: + pass + self.payload._fetch_instances() + + + def _write__seq(self, io=None): + super(Sqlite3.TableLeafCell, self)._write__seq(io) + self.payload_size._write__seq(self._io) + self.row_id._write__seq(self._io) + _on = (1 if (self.payload_size.value > self._root.header.table_max_overflow_payload_size) else 0) + if _on == 0: + pass + self.payload._write__seq(self._io) + elif _on == 1: + pass + self.payload._write__seq(self._io) + + + def _check(self): + pass + _on = (1 if (self.payload_size.value > self._root.header.table_max_overflow_payload_size) else 0) + if _on == 0: + pass + if self.payload._root != self._root: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._root, self._root) + if self.payload._parent != self: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._parent, self) + elif _on == 1: + pass + if self.payload._root != self._root: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._root, self._root) + if self.payload._parent != self: + raise kaitaistruct.ConsistencyError(u"payload", self.payload._parent, self) + if (self.payload.payload_size != self.payload_size.value): + raise kaitaistruct.ConsistencyError(u"payload", self.payload.payload_size, self.payload_size.value) + if (self.payload.overflow_payload_size_max != self._root.header.table_max_overflow_payload_size): + raise kaitaistruct.ConsistencyError(u"payload", self.payload.overflow_payload_size_max, self._root.header.table_max_overflow_payload_size) + + + class CellPointer(ReadWriteKaitaiStruct): + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.ofs_content = self._io.read_u2be() + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.CellPointer, self)._write__seq(io) + self._io.write_u2be(self.ofs_content) + + + def _check(self): + pass + + + class Value(ReadWriteKaitaiStruct): + def __init__(self, serial_type, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.serial_type = serial_type + + def _read(self): + _on = self.serial_type.type + if _on == Sqlite3.Serial.integer_0: + pass + self.value = Sqlite3.Int0(self._io, self, self._root) + self.value._read() + elif _on == Sqlite3.Serial.two_comp_24: + pass + self.value = self._io.read_bits_int_be(24) + elif _on == Sqlite3.Serial.nil: + pass + self.value = Sqlite3.NullValue(self._io, self, self._root) + self.value._read() + elif _on == Sqlite3.Serial.blob: + pass + self.value = Sqlite3.Blob(self.serial_type.len_blob_string, self._io, self, self._root) + self.value._read() + elif _on == Sqlite3.Serial.string_utf8: + pass + self.value = Sqlite3.StringUtf8(self.serial_type.len_blob_string, self._io, self, self._root) + self.value._read() + elif _on == Sqlite3.Serial.two_comp_16: + pass + self.value = self._io.read_s2be() + elif _on == Sqlite3.Serial.ieee754_64: + pass + self.value = self._io.read_f8be() + elif _on == Sqlite3.Serial.two_comp_8: + pass + self.value = self._io.read_s1() + elif _on == Sqlite3.Serial.string_utf16_be: + pass + self.value = Sqlite3.StringUtf16Be(self.serial_type.len_blob_string, self._io, self, self._root) + self.value._read() + elif _on == Sqlite3.Serial.two_comp_48: + pass + self.value = self._io.read_bits_int_be(48) + elif _on == Sqlite3.Serial.integer_1: + pass + self.value = Sqlite3.Int1(self._io, self, self._root) + self.value._read() + elif _on == Sqlite3.Serial.string_utf16_le: + pass + self.value = Sqlite3.StringUtf16Le(self.serial_type.len_blob_string, self._io, self, self._root) + self.value._read() + elif _on == Sqlite3.Serial.two_comp_32: + pass + self.value = self._io.read_s4be() + elif _on == Sqlite3.Serial.two_comp_64: + pass + self.value = self._io.read_s8be() + + + def _fetch_instances(self): + pass + _on = self.serial_type.type + if _on == Sqlite3.Serial.integer_0: + pass + self.value._fetch_instances() + elif _on == Sqlite3.Serial.two_comp_24: + pass + elif _on == Sqlite3.Serial.nil: + pass + self.value._fetch_instances() + elif _on == Sqlite3.Serial.blob: + pass + self.value._fetch_instances() + elif _on == Sqlite3.Serial.string_utf8: + pass + self.value._fetch_instances() + elif _on == Sqlite3.Serial.two_comp_16: + pass + elif _on == Sqlite3.Serial.ieee754_64: + pass + elif _on == Sqlite3.Serial.two_comp_8: + pass + elif _on == Sqlite3.Serial.string_utf16_be: + pass + self.value._fetch_instances() + elif _on == Sqlite3.Serial.two_comp_48: + pass + elif _on == Sqlite3.Serial.integer_1: + pass + self.value._fetch_instances() + elif _on == Sqlite3.Serial.string_utf16_le: + pass + self.value._fetch_instances() + elif _on == Sqlite3.Serial.two_comp_32: + pass + elif _on == Sqlite3.Serial.two_comp_64: + pass + + + def _write__seq(self, io=None): + super(Sqlite3.Value, self)._write__seq(io) + _on = self.serial_type.type + if _on == Sqlite3.Serial.integer_0: + pass + self.value._write__seq(self._io) + elif _on == Sqlite3.Serial.two_comp_24: + pass + self._io.write_bits_int_be(24, self.value) + elif _on == Sqlite3.Serial.nil: + pass + self.value._write__seq(self._io) + elif _on == Sqlite3.Serial.blob: + pass + self.value._write__seq(self._io) + elif _on == Sqlite3.Serial.string_utf8: + pass + self.value._write__seq(self._io) + elif _on == Sqlite3.Serial.two_comp_16: + pass + self._io.write_s2be(self.value) + elif _on == Sqlite3.Serial.ieee754_64: + pass + self._io.write_f8be(self.value) + elif _on == Sqlite3.Serial.two_comp_8: + pass + self._io.write_s1(self.value) + elif _on == Sqlite3.Serial.string_utf16_be: + pass + self.value._write__seq(self._io) + elif _on == Sqlite3.Serial.two_comp_48: + pass + self._io.write_bits_int_be(48, self.value) + elif _on == Sqlite3.Serial.integer_1: + pass + self.value._write__seq(self._io) + elif _on == Sqlite3.Serial.string_utf16_le: + pass + self.value._write__seq(self._io) + elif _on == Sqlite3.Serial.two_comp_32: + pass + self._io.write_s4be(self.value) + elif _on == Sqlite3.Serial.two_comp_64: + pass + self._io.write_s8be(self.value) + + + def _check(self): + pass + _on = self.serial_type.type + if _on == Sqlite3.Serial.integer_0: + pass + if self.value._root != self._root: + raise kaitaistruct.ConsistencyError(u"value", self.value._root, self._root) + if self.value._parent != self: + raise kaitaistruct.ConsistencyError(u"value", self.value._parent, self) + elif _on == Sqlite3.Serial.two_comp_24: + pass + elif _on == Sqlite3.Serial.nil: + pass + if self.value._root != self._root: + raise kaitaistruct.ConsistencyError(u"value", self.value._root, self._root) + if self.value._parent != self: + raise kaitaistruct.ConsistencyError(u"value", self.value._parent, self) + elif _on == Sqlite3.Serial.blob: + pass + if self.value._root != self._root: + raise kaitaistruct.ConsistencyError(u"value", self.value._root, self._root) + if self.value._parent != self: + raise kaitaistruct.ConsistencyError(u"value", self.value._parent, self) + if (self.value.len_value != self.serial_type.len_blob_string): + raise kaitaistruct.ConsistencyError(u"value", self.value.len_value, self.serial_type.len_blob_string) + elif _on == Sqlite3.Serial.string_utf8: + pass + if self.value._root != self._root: + raise kaitaistruct.ConsistencyError(u"value", self.value._root, self._root) + if self.value._parent != self: + raise kaitaistruct.ConsistencyError(u"value", self.value._parent, self) + if (self.value.len_value != self.serial_type.len_blob_string): + raise kaitaistruct.ConsistencyError(u"value", self.value.len_value, self.serial_type.len_blob_string) + elif _on == Sqlite3.Serial.two_comp_16: + pass + elif _on == Sqlite3.Serial.ieee754_64: + pass + elif _on == Sqlite3.Serial.two_comp_8: + pass + elif _on == Sqlite3.Serial.string_utf16_be: + pass + if self.value._root != self._root: + raise kaitaistruct.ConsistencyError(u"value", self.value._root, self._root) + if self.value._parent != self: + raise kaitaistruct.ConsistencyError(u"value", self.value._parent, self) + if (self.value.len_value != self.serial_type.len_blob_string): + raise kaitaistruct.ConsistencyError(u"value", self.value.len_value, self.serial_type.len_blob_string) + elif _on == Sqlite3.Serial.two_comp_48: + pass + elif _on == Sqlite3.Serial.integer_1: + pass + if self.value._root != self._root: + raise kaitaistruct.ConsistencyError(u"value", self.value._root, self._root) + if self.value._parent != self: + raise kaitaistruct.ConsistencyError(u"value", self.value._parent, self) + elif _on == Sqlite3.Serial.string_utf16_le: + pass + if self.value._root != self._root: + raise kaitaistruct.ConsistencyError(u"value", self.value._root, self._root) + if self.value._parent != self: + raise kaitaistruct.ConsistencyError(u"value", self.value._parent, self) + if (self.value.len_value != self.serial_type.len_blob_string): + raise kaitaistruct.ConsistencyError(u"value", self.value.len_value, self.serial_type.len_blob_string) + elif _on == Sqlite3.Serial.two_comp_32: + pass + elif _on == Sqlite3.Serial.two_comp_64: + pass + + + class Blob(ReadWriteKaitaiStruct): + def __init__(self, len_value, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + self.len_value = len_value + + def _read(self): + self.value = self._io.read_bytes(self.len_value) + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(Sqlite3.Blob, self)._write__seq(io) + self._io.write_bytes(self.value) + + + def _check(self): + pass + if (len(self.value) != self.len_value): + raise kaitaistruct.ConsistencyError(u"value", len(self.value), self.len_value) + + + class Record(ReadWriteKaitaiStruct): + """ + .. seealso:: + Source - https://sqlite.org/fileformat2.html#record_format + """ + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.header_size = vlq_base128_be.VlqBase128Be(self._io) + self.header_size._read() + self._raw_header = self._io.read_bytes((self.header_size.value - 1)) + _io__raw_header = KaitaiStream(BytesIO(self._raw_header)) + self.header = Sqlite3.RecordHeader(_io__raw_header, self, self._root) + self.header._read() + self.values = [] + for i in range(len(self.header.value_types)): + _t_values = Sqlite3.Value(self.header.value_types[i], self._io, self, self._root) + _t_values._read() + self.values.append(_t_values) + + + + def _fetch_instances(self): + pass + self.header_size._fetch_instances() + self.header._fetch_instances() + for i in range(len(self.values)): + pass + self.values[i]._fetch_instances() + + + + def _write__seq(self, io=None): + super(Sqlite3.Record, self)._write__seq(io) + self.header_size._write__seq(self._io) + _io__raw_header = KaitaiStream(BytesIO(bytearray((self.header_size.value - 1)))) + self._io.add_child_stream(_io__raw_header) + _pos2 = self._io.pos() + self._io.seek(self._io.pos() + ((self.header_size.value - 1))) + def handler(parent, _io__raw_header=_io__raw_header): + self._raw_header = _io__raw_header.to_byte_array() + if (len(self._raw_header) != (self.header_size.value - 1)): + raise kaitaistruct.ConsistencyError(u"raw(header)", len(self._raw_header), (self.header_size.value - 1)) + parent.write_bytes(self._raw_header) + _io__raw_header.write_back_handler = KaitaiStream.WriteBackHandler(_pos2, handler) + self.header._write__seq(_io__raw_header) + for i in range(len(self.values)): + pass + self.values[i]._write__seq(self._io) + + + + def _check(self): + pass + if self.header._root != self._root: + raise kaitaistruct.ConsistencyError(u"header", self.header._root, self._root) + if self.header._parent != self: + raise kaitaistruct.ConsistencyError(u"header", self.header._parent, self) + if (len(self.values) != len(self.header.value_types)): + raise kaitaistruct.ConsistencyError(u"values", len(self.values), len(self.header.value_types)) + for i in range(len(self.values)): + pass + if self.values[i]._root != self._root: + raise kaitaistruct.ConsistencyError(u"values", self.values[i]._root, self._root) + if self.values[i]._parent != self: + raise kaitaistruct.ConsistencyError(u"values", self.values[i]._parent, self) + if self.values[i].serial_type != self.header.value_types[i]: + raise kaitaistruct.ConsistencyError(u"values", self.values[i].serial_type, self.header.value_types[i]) + + + + @property + def pages(self): + """This works well when parsing small database files. + + problem: + the first access to db.pages + for example `db.pages[0]` + will loop and parse **all** pages. + + To parse large database files, + the user should set + the internal cache attribute `db._m_pages` + so that any access to `db.pages` + will use the cached value in `db._m_pages`. + + # import sqlite3.py generated from sqlite3.ksy + import parser.sqlite3 as parser_sqlite3 + # create a lazy list class + # accessing db.pages[i] will call pages_list.__getitem__(i) + class PagesList: + def __init__(self, db): + self.db = db + def __len__(self): + return self.db.header.num_pages + def __getitem__(self, i): # i is 0-based + db = self.db + header = db.header + if i < 0: # -1 means last page, etc + i = header.num_pages + i + assert ( + 0 <= i and i < header.num_pages + ), f"page index is out of range: {i} is not in (0, {header.num_pages - 1})" + # todo: maybe cache page + # equality test: page_a.page_number == page_b.page_number + _pos = db._io.pos() + db._io.seek(i * header.page_size) + if i == header.idx_lock_byte_page: + page = parser_sqlite3.Sqlite3.LockBytePage((i + 1), db._io, db, db._root) + elif ( + i >= header.idx_first_ptrmap_page and + i <= header.idx_last_ptrmap_page + ): + page = parser_sqlite3.Sqlite3.PtrmapPage((i + 1), db._io, db, db._root) + else: + page = parser_sqlite3.Sqlite3.BtreePage((i + 1), db._io, db, db._root) + db._io.seek(_pos) + return page + # create a database parser + database = "test.db" + db = parser_sqlite3.Sqlite3.from_file(database) + # patch the internal cache attribute of db.pages + db._m_pages = PagesList(db) + db._read() + # now, this will parse **only** the first page + page = db.pages[0] + page._read() + """ + if self._should_write_pages: + self._write_pages() + if hasattr(self, '_m_pages'): + return self._m_pages + + _pos = self._io.pos() + self._io.seek(100) + self._raw__m_pages = [] + self._m_pages = [] + for i in range(self.header.num_pages): + _on = (0 if (i == self.header.idx_lock_byte_page) else (1 if (((i >= self.header.idx_first_ptrmap_page)) and ((i <= self.header.idx_last_ptrmap_page))) else 2)) + if _on == 0: + pass + self._raw__m_pages.append(self._io.read_bytes(((self.header.page_size - 100) if (i == 0) else self.header.page_size))) + _io__raw__m_pages = KaitaiStream(BytesIO(self._raw__m_pages[-1])) + _t__m_pages = Sqlite3.LockBytePage((i + 1), _io__raw__m_pages, self, self._root) + _t__m_pages._read() + self._m_pages.append(_t__m_pages) + elif _on == 1: + pass + self._raw__m_pages.append(self._io.read_bytes(((self.header.page_size - 100) if (i == 0) else self.header.page_size))) + _io__raw__m_pages = KaitaiStream(BytesIO(self._raw__m_pages[-1])) + _t__m_pages = Sqlite3.PointerMapPage((i + 1), _io__raw__m_pages, self, self._root) + _t__m_pages._read() + self._m_pages.append(_t__m_pages) + elif _on == 2: + pass + self._raw__m_pages.append(self._io.read_bytes(((self.header.page_size - 100) if (i == 0) else self.header.page_size))) + _io__raw__m_pages = KaitaiStream(BytesIO(self._raw__m_pages[-1])) + _t__m_pages = Sqlite3.BtreePage((i + 1), _io__raw__m_pages, self, self._root) + _t__m_pages._read() + self._m_pages.append(_t__m_pages) + else: + pass + self._m_pages.append(self._io.read_bytes(((self.header.page_size - 100) if (i == 0) else self.header.page_size))) + + self._io.seek(_pos) + return getattr(self, '_m_pages', None) + + @pages.setter + def pages(self, v): + self._m_pages = v + + def _write_pages(self): + self._should_write_pages = False + _pos = self._io.pos() + self._io.seek(100) + self._raw__m_pages = [] + for i in range(len(self._m_pages)): + pass + _on = (0 if (i == self.header.idx_lock_byte_page) else (1 if (((i >= self.header.idx_first_ptrmap_page)) and ((i <= self.header.idx_last_ptrmap_page))) else 2)) + if _on == 0: + pass + _io__raw__m_pages = KaitaiStream(BytesIO(bytearray(((self.header.page_size - 100) if (i == 0) else self.header.page_size)))) + self._io.add_child_stream(_io__raw__m_pages) + _pos2 = self._io.pos() + self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) + def handler(parent, _io__raw__m_pages=_io__raw__m_pages): + self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) + if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): + raise kaitaistruct.ConsistencyError(u"raw(pages)", len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]), ((self.header.page_size - 100) if (i == 0) else self.header.page_size)) + parent.write_bytes(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) + _io__raw__m_pages.write_back_handler = KaitaiStream.WriteBackHandler(_pos2, handler) + self.pages[i]._write__seq(_io__raw__m_pages) + elif _on == 1: + pass + _io__raw__m_pages = KaitaiStream(BytesIO(bytearray(((self.header.page_size - 100) if (i == 0) else self.header.page_size)))) + self._io.add_child_stream(_io__raw__m_pages) + _pos2 = self._io.pos() + self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) + def handler(parent, _io__raw__m_pages=_io__raw__m_pages): + self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) + if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): + raise kaitaistruct.ConsistencyError(u"raw(pages)", len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]), ((self.header.page_size - 100) if (i == 0) else self.header.page_size)) + parent.write_bytes(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) + _io__raw__m_pages.write_back_handler = KaitaiStream.WriteBackHandler(_pos2, handler) + self.pages[i]._write__seq(_io__raw__m_pages) + elif _on == 2: + pass + _io__raw__m_pages = KaitaiStream(BytesIO(bytearray(((self.header.page_size - 100) if (i == 0) else self.header.page_size)))) + self._io.add_child_stream(_io__raw__m_pages) + _pos2 = self._io.pos() + self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) + def handler(parent, _io__raw__m_pages=_io__raw__m_pages): + self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) + if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): + raise kaitaistruct.ConsistencyError(u"raw(pages)", len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]), ((self.header.page_size - 100) if (i == 0) else self.header.page_size)) + parent.write_bytes(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) + _io__raw__m_pages.write_back_handler = KaitaiStream.WriteBackHandler(_pos2, handler) + self.pages[i]._write__seq(_io__raw__m_pages) + else: + pass + self._io.write_bytes(self.pages[i]) + + self._io.seek(_pos) + + + def _check_pages(self): + pass + if (len(self.pages) != self.header.num_pages): + raise kaitaistruct.ConsistencyError(u"pages", len(self.pages), self.header.num_pages) + for i in range(len(self._m_pages)): + pass + _on = (0 if (i == self.header.idx_lock_byte_page) else (1 if (((i >= self.header.idx_first_ptrmap_page)) and ((i <= self.header.idx_last_ptrmap_page))) else 2)) + if _on == 0: + pass + if self.pages[i]._root != self._root: + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i]._root, self._root) + if self.pages[i]._parent != self: + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i]._parent, self) + if (self.pages[i].page_number != (i + 1)): + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i].page_number, (i + 1)) + elif _on == 1: + pass + if self.pages[i]._root != self._root: + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i]._root, self._root) + if self.pages[i]._parent != self: + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i]._parent, self) + if (self.pages[i].pointer_map_page_number != (i + 1)): + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i].pointer_map_page_number, (i + 1)) + elif _on == 2: + pass + if self.pages[i]._root != self._root: + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i]._root, self._root) + if self.pages[i]._parent != self: + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i]._parent, self) + if (self.pages[i].page_number != (i + 1)): + raise kaitaistruct.ConsistencyError(u"pages", self.pages[i].page_number, (i + 1)) + else: + pass + if (len(self.pages[i]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): + raise kaitaistruct.ConsistencyError(u"pages", len(self.pages[i]), ((self.header.page_size - 100) if (i == 0) else self.header.page_size)) + + + diff --git a/serialize_py/kaitaistruct_sqlite3.py.sh b/serialize_py/kaitaistruct_sqlite3.py.sh new file mode 100755 index 0000000..220f331 --- /dev/null +++ b/serialize_py/kaitaistruct_sqlite3.py.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -eux + +false && +if [ -e kaitaistruct_sqlite3.py ]; then + echo "keeping kaitaistruct_sqlite3.py" + exit +fi + +if ! [ -e kaitai_struct_formats/ ]; then + if false; then + git clone --depth=1 https://github.com/kaitai-io/kaitai_struct_formats + else + # sqlite3.ksy with support for lazy pages + # needed to parse large databases (larger than RAM) + # https://github.com/kaitai-io/kaitai_struct_formats/pull/661 + git clone --depth=1 https://github.com/milahu/kaitai_struct_formats --branch fix-sqlite3 + fi +fi + +# NOTE this requires kaitai-struct-compiler with serialization support +# which can be installed with nix-shell +# https://github.com/kaitai-io/kaitai_struct/issues/1060 +# https://github.com/kaitai-io/kaitai_struct_compiler/tree/serialization + +kaitai-struct-compiler --read-write --no-auto-read --target python --import-path kaitai_struct_formats/ kaitai_struct_formats/database/sqlite3.ksy +mv sqlite3.py kaitaistruct_sqlite3.py diff --git a/serialize_py/pyvlq.py b/serialize_py/pyvlq.py new file mode 100644 index 0000000..bee2016 --- /dev/null +++ b/serialize_py/pyvlq.py @@ -0,0 +1,43 @@ +# based on https://github.com/osoken/pyvlq/blob/main/src/pyvlq/core.py + +from io import BytesIO + + +def encode(value: int) -> bytes: + """Encode a value into a VLQ byte sequence.""" + if value < 0: + raise ValueError("Value must be non-negative") + if value == 0: + return b"\x00" + result = bytearray() + while value: + byte = value & 0x7F + value >>= 7 + result.append(byte | 0x80) + result.reverse() + result[-1] &= 0x7F + return bytes(result) + + +def decode(data: bytes) -> int: + """Decode a VLQ byte sequence into a value.""" + value = 0 + for byte in data: + value <<= 7 + value |= byte & 0x7F + if not byte & 0x80: + return value + raise ValueError("Malformed VLQ byte sequence") + + +def decode_stream(data: BytesIO) -> int: + """Decode a VLQ byte sequence into a value.""" + value = 0 + while True: + byte = data.read(1) + if not byte: + raise ValueError("Malformed VLQ byte sequence") + value <<= 7 + value |= byte[0] & 0x7F + if not byte[0] & 0x80: + return value diff --git a/serialize_py/shell.nix b/serialize_py/shell.nix new file mode 100644 index 0000000..8345ea7 --- /dev/null +++ b/serialize_py/shell.nix @@ -0,0 +1,34 @@ +{ + pkgs ? import { }, +}: + +let + nur = + pkgs.nur or + ( + import (builtins.fetchTarball "https://github.com/nix-community/NUR/archive/main.tar.gz") { + inherit pkgs; + repoOverrides = { + milahu = import (pkgs.fetchFromGitHub { + owner = "milahu"; + repo = "nur-packages"; + rev = "2d2ec41f5f1a416442321a0383f34b31e79b3bc3"; + hash = "sha256-Dd0kX0ey0A/IQcKyuxZ63mnjUTopwSaV8tMu3krsvAY="; + fetchSubmodules = true; + }) { inherit pkgs; }; + }; + } + ); +in + +pkgs.mkShell { + buildInputs = with pkgs; [ + diffutils + tinyxxd + nur.repos.milahu.kaitai-struct-compiler + (python3.withPackages (pp: with pp; [ + # requests + nur.repos.milahu.python3.pkgs.kaitaistruct + ])) + ]; +} diff --git a/serialize_py/sqlite3.ksy b/serialize_py/sqlite3.ksy new file mode 100644 index 0000000..664b0fb --- /dev/null +++ b/serialize_py/sqlite3.ksy @@ -0,0 +1,699 @@ +meta: + id: sqlite3 + title: SQLite3 database file + file-extension: + - sqlite + - db + - db3 + - sqlite3 + xref: + forensicswiki: SQLite_database_format + justsolve: SQLite + loc: fdd000461 + pronom: fmt/729 + wikidata: Q28600453 + license: CC0-1.0 + imports: + - /common/vlq_base128_be + endian: be +doc: | + SQLite3 is a popular serverless SQL engine, implemented as a library + to be used within other applications. It keeps its databases as + regular disk files. + + Every database file is segmented into pages. First page (starting at + the very beginning) is special: it contains a file-global header + which specifies some data relevant to proper parsing (i.e. format + versions, size of page, etc). After the header, normal contents of + the first page follow. + + Each page would be of some type (btree, ptrmap, lock_byte, or free), + and generally, they would be reached via the links starting from the + first page. The first page is always a btree page for the implicitly + defined `sqlite_schema` table. + + This works well when parsing small database files. To parse large + database files, see the documentation for /instances/pages. + + Further documentation: + + - https://www.sqlite.org/arch.html + - https://medium.com/the-polyglot-programmer/what-would-sqlite-look-like-if-written-in-rust-part-3-edd2eefda473 + - https://cstack.github.io/db_tutorial/parts/part7.html + + Original sources: + + - https://github.com/sqlite/sqlite/blob/master/src/btree.h + - https://github.com/sqlite/sqlite/blob/master/src/btree.c +doc-ref: https://www.sqlite.org/fileformat2.html +seq: + - id: header + type: database_header + # no. allow parsing only the header to save memory + # - id: first_page + # type: btree_page(1) +instances: + pages: + type: + switch-on: '(_index == header.idx_lock_byte_page ? 0 : _index >= header.idx_first_ptrmap_page and _index <= header.idx_last_ptrmap_page ? 1 : 2)' + cases: + 0: lock_byte_page(_index + 1) + 1: pointer_map_page(_index + 1) + # TODO: Free pages and cell overflow pages are incorrectly interpreted as btree pages + # This is unfortunate, but unavoidable since there's no way to recognize these types at + # this point in the parser. + 2: btree_page(_index + 1) + # The first 100 bytes of the database file comprise the database file header + pos: 100 + # size: header.page_size + # the first page is 100 bytes smaller than all other pages + size: '(_index == 0) ? (header.page_size - 100) : header.page_size' + repeat: expr + repeat-expr: header.num_pages + doc: | + This works well when parsing small database files. + + problem: + the first access to db.pages + for example `db.pages[0]` + will loop and parse **all** pages. + + To parse large database files, + the user should set + the internal cache attribute `db._m_pages` + so that any access to `db.pages` + will use the cached value in `db._m_pages`. + + # import sqlite3.py generated from sqlite3.ksy + import parser.sqlite3 as parser_sqlite3 + # create a lazy list class + # accessing db.pages[i] will call pages_list.__getitem__(i) + class PagesList: + def __init__(self, db): + self.db = db + def __len__(self): + return self.db.header.num_pages + def __getitem__(self, i): # i is 0-based + db = self.db + header = db.header + if i < 0: # -1 means last page, etc + i = header.num_pages + i + assert ( + 0 <= i and i < header.num_pages + ), f"page index is out of range: {i} is not in (0, {header.num_pages - 1})" + # todo: maybe cache page + # equality test: page_a.page_number == page_b.page_number + _pos = db._io.pos() + db._io.seek(i * header.page_size) + if i == header.idx_lock_byte_page: + page = parser_sqlite3.Sqlite3.LockBytePage((i + 1), db._io, db, db._root) + elif ( + i >= header.idx_first_ptrmap_page and + i <= header.idx_last_ptrmap_page + ): + page = parser_sqlite3.Sqlite3.PtrmapPage((i + 1), db._io, db, db._root) + else: + page = parser_sqlite3.Sqlite3.BtreePage((i + 1), db._io, db, db._root) + db._io.seek(_pos) + return page + # create a database parser + database = "test.db" + db = parser_sqlite3.Sqlite3.from_file(database) + # patch the internal cache attribute of db.pages + db._m_pages = PagesList(db) + db._read() + # now, this will parse **only** the first page + page = db.pages[0] + page._read() +types: + database_header: + seq: + - id: magic + contents: ["SQLite format 3", 0] + - id: page_size_raw + type: u2 + doc: | + The database page size in bytes. Must be a power of two between + 512 and 32768 inclusive, or the value 1 representing a page size + of 65536. The interpreted value is available as `page_size`. + - id: write_version + type: u1 + enum: format_version + doc: File format write version + - id: read_version + type: u1 + enum: format_version + doc: File format read version + - id: page_reserved_space_size + type: u1 + doc: Bytes of unused "reserved" space at the end of each page. Usually 0. + - id: max_payload_fraction + type: u1 + doc: Maximum embedded payload fraction. Must be 64. + - id: min_payload_fraction + type: u1 + doc: Minimum embedded payload fraction. Must be 32. + - id: leaf_payload_fraction + type: u1 + doc: Leaf payload fraction. Must be 32. + - id: file_change_counter + type: u4 + - id: num_pages + type: u4 + doc: Size of the database file in pages. The "in-header database size". + - id: first_freelist_trunk_page + type: freelist_trunk_page_pointer + doc: Page number of the first freelist trunk page. + - id: num_freelist_pages + type: u4 + doc: Total number of freelist pages. + - id: schema_cookie + type: u4 + - id: schema_format + type: u4 + doc: The schema format number. Supported schema formats are 1, 2, 3, and 4. + - id: default_page_cache_size + type: u4 + doc: Default page cache size. + - id: largest_root_page + type: u4 + doc: The page number of the largest root b-tree page when in auto-vacuum or incremental-vacuum modes, or zero otherwise. + - id: text_encoding + type: u4 + doc: The database text encoding. A value of 1 means UTF-8. A value of 2 means UTF-16le. A value of 3 means UTF-16be. + - id: user_version + type: u4 + doc: The "user version" as read and set by the user_version pragma. + - id: is_incremental_vacuum + type: u4 + doc: True (non-zero) for incremental-vacuum mode. False (zero) otherwise. + - id: application_id + type: u4 + doc: The "Application ID" set by PRAGMA application_id. + - id: reserved_header_bytes + size: 20 + - id: version_valid_for + type: u4 + - id: sqlite_version_number + type: u4 + instances: + page_size: + value: 'page_size_raw == 1 ? 0x10000 : page_size_raw' + doc: The database page size in bytes + usable_size: + value: 'page_size - page_reserved_space_size' + doc: The "usable size" of a database page + overflow_min_payload_size: + value: ((usable_size-12)*32/255)-23 + doc: The minimum amount of payload that must be stored on the btree page before spilling is allowed + table_max_overflow_payload_size: + value: usable_size - 35 + doc: The maximum amount of payload that can be stored directly on the b-tree page without spilling onto an overflow page. Value for table page + index_max_overflow_payload_size: + value: ((usable_size-12)*64/255)-23 + doc: The maximum amount of payload that can be stored directly on the b-tree page without spilling onto an overflow page. Value for index page + idx_lock_byte_page: + value: '1073741824 / page_size' + num_ptrmap_entries_max: + value: usable_size/5 + doc: The maximum number of ptrmap entries per ptrmap page + idx_first_ptrmap_page: + value: 'largest_root_page > 0 ? 1 : 0' + doc: The index (0-based) of the first ptrmap page + num_ptrmap_pages: + value: 'idx_first_ptrmap_page > 0 ? (num_pages / num_ptrmap_entries_max) + 1 : 0' + doc: The number of ptrmap pages in the database + idx_last_ptrmap_page: + value: 'idx_first_ptrmap_page + num_ptrmap_pages - (idx_first_ptrmap_page + num_ptrmap_pages >= idx_lock_byte_page ? 0 : 1)' + doc: The index (0-based) of the last ptrmap page (inclusive) + lock_byte_page: + params: + - id: page_number + type: u4 + seq: [] + doc: | + The lock-byte page is the single page of the database file that contains the bytes at offsets between + 1073741824 and 1073742335, inclusive. A database file that is less than or equal to 1073741824 bytes + in size contains no lock-byte page. A database file larger than 1073741824 contains exactly one + lock-byte page. + The lock-byte page is set aside for use by the operating-system specific VFS implementation in implementing + the database file locking primitives. SQLite does not use the lock-byte page. + pointer_map_page: + params: + - id: pointer_map_page_number + type: u4 + seq: + - id: entries + type: pointer_map_entry + repeat: expr + repeat-expr: num_entries + instances: + first_linked_page_number: + value: pointer_map_page_number + 1 + pointer_map_page_entries_max: + value: _root.header.usable_size / 5 + last_linked_page_number_max: + value: pointer_map_page_number + pointer_map_page_entries_max + last_linked_page_number: + value: | + last_linked_page_number_max <= _root.header.num_pages + ? last_linked_page_number_max + : _root.header.num_pages + num_entries: + value: last_linked_page_number - first_linked_page_number + 1 + doc: | + A ptrmap page contains back-links from child to parent. + See also: /types/pointer_map_entry. + + Pointer map pages (or "ptrmap pages") + are extra pages inserted into the database + to make the operation of auto_vacuum and + incremental_vacuum modes more efficient. + + Ptrmap pages must exist in any database file + which has a non-zero largest root b-tree page value + in db.header.largest_root_page. + + If db.header.largest_root_page is zero, + then the database must not contain ptrmap pages. + + The first ptrmap page (on page 2) + will contain back pointer information + for pages 3 through J+2, inclusive. + + The second pointer map page will be on page J+3 + and that ptrmap page will provide back pointer information + for pages J+4 through 2*J+3 inclusive. + + And so forth for the entire database file. + + ```py + page_size = 512 + page_reserved_space_size = 0 + U = usable_size = page_size - page_reserved_space_size # 512 + J = pointer_map_page_entries_max = usable_size // 5 # 102 + + # pointer map 1 + X = 1 + N = pointer_map_page_number_raw = ((X - 1) * J) + 1 + X # 2 + A = first_linked_page_number = N + 1 # 3 + Z = last_linked_page_number = N + J # 104 = J + 2 + + # pointer map 2 + X = 2 + N = pointer_map_page_number = ((X - 1) * J) + 1 + X # 105 = J + 3 + A = first_linked_page_number = N + 1 # 106 = J + 4 + Z = last_linked_page_number = N + J # 207 = (2 * J) + 3 + + # pointer map 3 + X = 3 + N = pointer_map_page_number = ((X - 1) * J) + 1 + X # 208 + A = first_linked_page_number = N + 1 # 209 + Z = last_linked_page_number = N + J # 310 + + # pointer map 4 + X = 4 + N = pointer_map_page_number = ((X - 1) * J) + 1 + X # 311 + A = first_linked_page_number = N + 1 # 312 + Z = last_linked_page_number = N + J # 413 + ``` + + actual pointer_map_page_number: + + ```py + NR = pointer_map_page_number_raw = ((X - 1) * J) + 1 + X # 2 + N = pointer_map_page_number = ( + pointer_map_page_number_raw + if (pointer_map_page_number_raw != lock_byte_page_number) + else (pointer_map_page_number_raw + 1) + ) + ``` + doc-ref: https://www.sqlite.org/fileformat2.html#pointer_map_or_ptrmap_pages + pointer_map_entry: + seq: + - id: type + type: u1 + enum: ptrmap_page_type + - id: page_number + type: u4 + doc-ref: https://www.sqlite.org/fileformat2.html#pointer_map_or_ptrmap_pages + btree_page_pointer: + seq: + - id: page_number + type: u4 + instances: + page: + io: _root._io + pos: (page_number - 1) * _root.header.page_size + size: _root.header.page_size + type: btree_page(page_number) + if: page_number != 0 + btree_page: + params: + - id: page_number + type: u4 + seq: + # no. the database header is not part of the first page + # - id: database_header + # type: database_header + # if: page_number == 1 + - id: page_type + type: u1 + enum: btree_page_type + - id: first_freeblock + type: u2 + doc: The start of the first freeblock on the page, or is zero if there are no freeblocks. + - id: num_cells + type: u2 + doc: The number of cells on the page + - id: ofs_cell_content_area_raw + type: u2 + doc: | + The start of the cell content area. A zero value for this integer is interpreted as 65536. + The interpreted value is available as `cell_content_area`. + - id: num_frag_free_bytes + type: u1 + doc: The number of fragmented free bytes within the cell content area. + - id: right_ptr + type: btree_page_pointer + if: page_type == btree_page_type::index_interior_page or page_type == btree_page_type::table_interior_page + doc: | + The right-most pointer. This value appears in the header of interior + b-tree pages only and is omitted from all other pages. + - id: cell_pointers + type: cell_pointer + repeat: expr + repeat-expr: num_cells + instances: + ofs_cell_content_area: + value: 'ofs_cell_content_area_raw == 0 ? 65536 : ofs_cell_content_area_raw' + cell_content_area: + pos: ofs_cell_content_area + size: _root.header.usable_size - ofs_cell_content_area + reserved_space: + pos: _root.header.page_size - _root.header.page_reserved_space_size + size-eos: true + if: _root.header.page_reserved_space_size != 0 + cell_pointer: + seq: + - id: ofs_content + type: u2 +# FIXME this breaks serialization: +# _io__raw_header = KaitaiStream(BytesIO(bytearray((self.header_size.value - 1)))) +# ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# ValueError: negative count +# instances: +# content: +# # ofs_content is relative to page +# pos: ((_parent.page_number - 1) * _root.header.page_size) + ofs_content +# type: +# switch-on: _parent.page_type +# cases: +# btree_page_type::table_leaf_page: table_leaf_cell +# btree_page_type::table_interior_page: table_interior_cell +# btree_page_type::index_leaf_page: index_leaf_cell +# btree_page_type::index_interior_page: index_interior_cell + table_leaf_cell: + doc-ref: 'https://www.sqlite.org/fileformat2.html#b_tree_pages' + seq: + - id: payload_size + type: vlq_base128_be + doc: | + total number of bytes of payload, + including any overflow + - id: row_id + type: vlq_base128_be + doc: | + integer key, a.k.a. "rowid" + - id: payload + type: + switch-on: '(payload_size.value > _root.header.table_max_overflow_payload_size ? 1 : 0)' + cases: + 0: record + 1: overflow_record(payload_size.value, _root.header.table_max_overflow_payload_size) + doc: | + The initial portion of the payload + that does not spill to overflow pages. + table_interior_cell: + doc-ref: 'https://www.sqlite.org/fileformat2.html#b_tree_pages' + seq: + - id: left_child_page + type: btree_page_pointer + - id: row_id + type: vlq_base128_be + index_leaf_cell: + doc-ref: 'https://www.sqlite.org/fileformat2.html#b_tree_pages' + seq: + - id: payload_size + type: vlq_base128_be + - id: payload + type: + switch-on: '(payload_size.value > _root.header.index_max_overflow_payload_size ? 1 : 0)' + cases: + 0: record + 1: overflow_record(payload_size.value, _root.header.index_max_overflow_payload_size) + index_interior_cell: + doc-ref: 'https://www.sqlite.org/fileformat2.html#b_tree_pages' + seq: + - id: left_child_page + type: btree_page_pointer + - id: payload_size + type: vlq_base128_be + - id: payload + type: + switch-on: '(payload_size.value > _root.header.index_max_overflow_payload_size ? 1 : 0)' + cases: + 0: record + 1: overflow_record(payload_size.value, _root.header.index_max_overflow_payload_size) + record: + doc-ref: 'https://sqlite.org/fileformat2.html#record_format' + seq: + - id: header_size + type: vlq_base128_be + - id: header + type: record_header + size: header_size.value - 1 + - id: values + type: value(header.value_types[_index]) + repeat: expr + repeat-expr: header.value_types.size + record_header: + seq: + - id: value_types + type: serial_type + repeat: eos + serial_type: + -webide-representation: "{type:dec}" + seq: + - id: raw_value + type: vlq_base128_be + instances: + type: + # Workaround for string encoding: + # 13 + _root.header.text_encoding - 1 + # See type serial: + # 12: blob + # 13: string_utf8 + # 14: string_utf16_le + # 15: string_utf16_be + value: 'raw_value.value >= 12 ? ((raw_value.value % 2 == 0) ? 12 : 13 + _root.header.text_encoding - 1) : raw_value.value' + enum: serial + len_blob_string: + value: '(raw_value.value % 2 == 0) ? (raw_value.value - 12) / 2 : (raw_value.value - 13) / 2' + if: raw_value.value >= 12 + value: + params: + - id: serial_type + type: serial_type + seq: + - id: value + type: + switch-on: serial_type.type + cases: + serial::nil: null_value + serial::two_comp_8: s1 + serial::two_comp_16: s2 + serial::two_comp_24: b24 + serial::two_comp_32: s4 + serial::two_comp_48: b48 + serial::two_comp_64: s8 + serial::ieee754_64: f8 + serial::integer_0: int_0 + serial::integer_1: int_1 + serial::blob: blob(serial_type.len_blob_string) + # Workaround for string encoding: + serial::string_utf8: string_utf8(serial_type.len_blob_string) + serial::string_utf16_le: string_utf16_le(serial_type.len_blob_string) + serial::string_utf16_be: string_utf16_be(serial_type.len_blob_string) + null_value: + -webide-representation: "NULL" + seq: [] + int_0: + -webide-representation: "0" + seq: [] + int_1: + -webide-representation: "1" + seq: [] + string_utf8: + params: + - id: len_value + type: u4 + seq: + - id: value + size: len_value + type: str + encoding: UTF-8 + string_utf16_be: + params: + - id: len_value + type: u4 + seq: + - id: value + size: len_value + type: str + encoding: UTF-16BE + string_utf16_le: + params: + - id: len_value + type: u4 + seq: + - id: value + size: len_value + type: str + encoding: UTF-16LE + blob: + params: + - id: len_value + type: u4 + seq: + - id: value + size: len_value + overflow_record: + params: + - id: payload_size + type: u8 + - id: overflow_payload_size_max + type: u8 + seq: + - id: inline_payload + size: '(inline_payload_size <= overflow_payload_size_max ? inline_payload_size : _root.header.overflow_min_payload_size)' + - id: overflow_page_number + type: overflow_page_pointer + doc: | + page number for the first page + of the overflow page list + instances: + inline_payload_size: + value: _root.header.overflow_min_payload_size+((payload_size-_root.header.overflow_min_payload_size)%(_root.header.usable_size-4)) + overflow_page_pointer: + seq: + - id: page_number + type: u4 + instances: + page: + io: _root._io + pos: (page_number - 1) * _root.header.page_size + size: _root.header.page_size + type: overflow_page + if: page_number != 0 + overflow_page: + seq: + - id: next_page_number + type: overflow_page_pointer + - id: content + size: _root.header.page_size - 4 + freelist_trunk_page_pointer: + seq: + - id: page_number + type: u4 + instances: + page: + io: _root._io + pos: (page_number - 1) * _root.header.page_size + size: _root.header.page_size + type: freelist_trunk_page + if: page_number != 0 + freelist_trunk_page: + seq: + - id: next_page + type: freelist_trunk_page_pointer + - id: num_free_pages + type: u4 + - id: free_pages + type: u4 + repeat: expr + repeat-expr: num_free_pages +enums: + format_version: + 1: legacy + 2: wal + btree_page_type: + 0x02: index_interior_page + 0x05: table_interior_page + 0x0a: index_leaf_page + 0x0d: table_leaf_page + ptrmap_page_type: + 1: root_page + 2: free_page + 3: overflow1 + 4: overflow2 + 5: btree + serial: + # Value is a NULL. + 0: nil + # Value is an 8-bit twos-complement integer. + 1: two_comp_8 + # Value is a big-endian 16-bit twos-complement integer. + 2: two_comp_16 + # Value is a big-endian 24-bit twos-complement integer. + 3: two_comp_24 + # Value is a big-endian 32-bit twos-complement integer. + 4: two_comp_32 + # Value is a big-endian 48-bit twos-complement integer. + 5: two_comp_48 + # Value is a big-endian 64-bit twos-complement integer. + 6: two_comp_64 + # Value is a big-endian IEEE 754-2008 64-bit floating point number. + 7: ieee754_64 + # Value is the integer 0. (Only available for schema format 4 and higher.) + 8: integer_0 + # Value is the integer 1. (Only available for schema format 4 and higher.) + 9: integer_1 + # Reserved for internal use. These serial type codes will never appear in a + # well-formed database file, but they might be used in transient and temporary + # database files that SQLite sometimes generates for its own use. The meanings + # of these codes can shift from one release of SQLite to the next. + 10: internal_1 + 11: internal_2 + # The serial types for blob and string are 'N >= 12 and even' and 'N >=13 and odd' respectively + # The enum here differs slightly to have a single value for blob and a value per text encoding + # for string. + # + # Value is a BLOB that is (N-12)/2 bytes in length. + 12: blob + # Value is a string in the text encoding and (N-13)/2 bytes in length. The nul terminator is + # not stored. + # Workaround for string encoding: + # Originally, sqlite3 has only one string type, + # and the string encoding is stored in _root.header.text_encoding. + 13: string_utf8 + 14: string_utf16_le + 15: string_utf16_be + # FIXME error: expected string or map, got 0 + #serial_type_size: + # 0: 0 + # 1: 1 + # 2: 2 + # 3: 3 + # 4: 4 + # 5: 6 + # 6: 8 + # 7: 8 + # 8: 0 + # 9: 0 + # # -1 means variable size + # 10: -1 # internal + # 11: -1 # internal + # # blob and string: size is stored in serial_type.len_blob_string + # 12: -1 # blob + # 13: -1 # string diff --git a/serialize_py/vlq_base128_be.ksy b/serialize_py/vlq_base128_be.ksy new file mode 100644 index 0000000..0e60067 --- /dev/null +++ b/serialize_py/vlq_base128_be.ksy @@ -0,0 +1,55 @@ +meta: + id: vlq_base128_be + title: Variable length quantity, unsigned integer, base128, big-endian + license: CC0-1.0 + ks-version: 0.7 +doc: | + A variable-length unsigned integer using base128 encoding. 1-byte groups + consist of 1-bit flag of continuation and 7-bit value chunk, and are ordered + "most significant group first", i.e. in "big-endian" manner. + + This particular encoding is specified and used in: + + * Standard MIDI file format + * ASN.1 BER encoding + * RAR 5.0 file format + + More information on this encoding is available at + https://en.wikipedia.org/wiki/Variable-length_quantity + + This particular implementation supports serialized values to up 8 bytes long. +-webide-representation: '{value:dec}' +seq: + - id: groups + type: group + repeat: until + repeat-until: not _.has_next +types: + group: + -webide-representation: '{value}' + doc: | + One byte group, clearly divided into 7-bit "value" chunk and 1-bit "continuation" flag. + seq: + - id: b + type: u1 + instances: + has_next: + value: (b & 0b1000_0000) != 0 + doc: If true, then we have more bytes to read + value: + value: b & 0b0111_1111 + doc: The 7-bit (base128) numeric value chunk of this group +instances: + last: + value: groups.size - 1 + value: + value: >- + groups[last].value + + (last >= 1 ? (groups[last - 1].value << 7) : 0) + + (last >= 2 ? (groups[last - 2].value << 14) : 0) + + (last >= 3 ? (groups[last - 3].value << 21) : 0) + + (last >= 4 ? (groups[last - 4].value << 28) : 0) + + (last >= 5 ? (groups[last - 5].value << 35) : 0) + + (last >= 6 ? (groups[last - 6].value << 42) : 0) + + (last >= 7 ? (groups[last - 7].value << 49) : 0) + doc: Resulting value as normal integer diff --git a/serialize_py/vlq_base128_be.py b/serialize_py/vlq_base128_be.py new file mode 100644 index 0000000..e7b55e2 --- /dev/null +++ b/serialize_py/vlq_base128_be.py @@ -0,0 +1,144 @@ +# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild +# type: ignore + +import kaitaistruct +from kaitaistruct import ReadWriteKaitaiStruct, KaitaiStream, BytesIO + + +if getattr(kaitaistruct, 'API_VERSION', (0, 9)) < (0, 11): + raise Exception("Incompatible Kaitai Struct Python API: 0.11 or later is required, but you have %s" % (kaitaistruct.__version__)) + +class VlqBase128Be(ReadWriteKaitaiStruct): + """A variable-length unsigned integer using base128 encoding. 1-byte groups + consist of 1-bit flag of continuation and 7-bit value chunk, and are ordered + "most significant group first", i.e. in "big-endian" manner. + + This particular encoding is specified and used in: + + * Standard MIDI file format + * ASN.1 BER encoding + * RAR 5.0 file format + + More information on this encoding is available at + https://en.wikipedia.org/wiki/Variable-length_quantity + + This particular implementation supports serialized values to up 8 bytes long. + """ + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + + def _read(self): + self.groups = [] + i = 0 + while True: + _t_groups = VlqBase128Be.Group(self._io, self, self._root) + _t_groups._read() + _ = _t_groups + self.groups.append(_) + if not (_.has_next): + break + i += 1 + + + def _fetch_instances(self): + pass + for i in range(len(self.groups)): + pass + self.groups[i]._fetch_instances() + + + + def _write__seq(self, io=None): + super(VlqBase128Be, self)._write__seq(io) + for i in range(len(self.groups)): + pass + self.groups[i]._write__seq(self._io) + + + + def _check(self): + pass + if (len(self.groups) == 0): + raise kaitaistruct.ConsistencyError(u"groups", len(self.groups), 0) + for i in range(len(self.groups)): + pass + if self.groups[i]._root != self._root: + raise kaitaistruct.ConsistencyError(u"groups", self.groups[i]._root, self._root) + if self.groups[i]._parent != self: + raise kaitaistruct.ConsistencyError(u"groups", self.groups[i]._parent, self) + _ = self.groups[i] + if (not (_.has_next) != (i == (len(self.groups) - 1))): + raise kaitaistruct.ConsistencyError(u"groups", not (_.has_next), (i == (len(self.groups) - 1))) + + + class Group(ReadWriteKaitaiStruct): + """One byte group, clearly divided into 7-bit "value" chunk and 1-bit "continuation" flag. + """ + def __init__(self, _io=None, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root + + def _read(self): + self.b = self._io.read_u1() + + + def _fetch_instances(self): + pass + + + def _write__seq(self, io=None): + super(VlqBase128Be.Group, self)._write__seq(io) + self._io.write_u1(self.b) + + + def _check(self): + pass + + @property + def has_next(self): + """If true, then we have more bytes to read.""" + if hasattr(self, '_m_has_next'): + return self._m_has_next + + self._m_has_next = ((self.b & 128) != 0) + return getattr(self, '_m_has_next', None) + + def _invalidate_has_next(self): + del self._m_has_next + @property + def value(self): + """The 7-bit (base128) numeric value chunk of this group.""" + if hasattr(self, '_m_value'): + return self._m_value + + self._m_value = (self.b & 127) + return getattr(self, '_m_value', None) + + def _invalidate_value(self): + del self._m_value + + @property + def last(self): + if hasattr(self, '_m_last'): + return self._m_last + + self._m_last = (len(self.groups) - 1) + return getattr(self, '_m_last', None) + + def _invalidate_last(self): + del self._m_last + @property + def value(self): + """Resulting value as normal integer.""" + if hasattr(self, '_m_value'): + return self._m_value + + self._m_value = (((((((self.groups[self.last].value + ((self.groups[(self.last - 1)].value << 7) if (self.last >= 1) else 0)) + ((self.groups[(self.last - 2)].value << 14) if (self.last >= 2) else 0)) + ((self.groups[(self.last - 3)].value << 21) if (self.last >= 3) else 0)) + ((self.groups[(self.last - 4)].value << 28) if (self.last >= 4) else 0)) + ((self.groups[(self.last - 5)].value << 35) if (self.last >= 5) else 0)) + ((self.groups[(self.last - 6)].value << 42) if (self.last >= 6) else 0)) + ((self.groups[(self.last - 7)].value << 49) if (self.last >= 7) else 0)) + return getattr(self, '_m_value', None) + + def _invalidate_value(self): + del self._m_value + From 7eef294e5fd549f58c1caf9118123b5a6d4b52a7 Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Thu, 31 Jul 2025 17:09:35 +0200 Subject: [PATCH 02/10] process only the database header --- serialize_py/codegen_result.py | 2 +- serialize_py/kaitai_serialize_codegen.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/serialize_py/codegen_result.py b/serialize_py/codegen_result.py index b317cb5..ba1acba 100644 --- a/serialize_py/codegen_result.py +++ b/serialize_py/codegen_result.py @@ -2,7 +2,7 @@ import kaitaistruct import kaitaistruct_sqlite3 -root_size = 8192 +root_size = 100 def get_root(_io=None, check=True): if not _io: diff --git a/serialize_py/kaitai_serialize_codegen.py b/serialize_py/kaitai_serialize_codegen.py index a3ee65f..13e776a 100755 --- a/serialize_py/kaitai_serialize_codegen.py +++ b/serialize_py/kaitai_serialize_codegen.py @@ -166,8 +166,14 @@ def get_local_key(key, global_names): print(">", shlex.join(args)) subprocess.run(args) +database_header_size = 100 + +with open(good_database_path, "rb") as f: + good_database_header_bytes = f.read(database_header_size) + # create a database parser -root = kaitaistruct_sqlite3.Sqlite3.from_file(good_database_path) +# root = kaitaistruct_sqlite3.Sqlite3.from_file(good_database_path) +root = kaitaistruct_sqlite3.Sqlite3.from_bytes(good_database_header_bytes) # patch the internal cache attribute of root.pages # root._m_pages = PagesList(root) @@ -178,7 +184,7 @@ def get_local_key(key, global_names): # now, this will parse **only** the first page # fix: 'BtreePage' object has no attribute 'cell_pointers' -if 1: +if 0: # print("root.pages[0] keys:", get_keys(root.pages[0])) # print("root.pages[0] seq:", get_seq(root.pages[0])) # FIXME kaitaistruct.ValidationNotEqualError: /types/database_header/seq/0: at pos 116: validation failed: not equal, @@ -192,7 +198,7 @@ def get_local_key(key, global_names): print_value("root.pages[0].num_cells") print_value("root.pages[0].cell_pointers[0]") print_value("root.pages[0].cell_pointers[0].ofs_content") -if 1: +if 0: # print("root.pages[1] keys:", get_keys(root.pages[1])) # print("root.pages[1] seq:", get_seq(root.pages[1])) print("root.pages[1]._read()"); root.pages[1]._read() @@ -452,7 +458,8 @@ def codegen( args = [ "diff", "--color=always", "-u", "<(", "xxd", codegen_database_path, ")", # red - "<(", "xxd", good_database_path, ")", # green + # "<(", "xxd", good_database_path, ")", # green + f"<( head -c{database_header_size} {good_database_path} | xxd )", # green "|", "head", "-n100", ] args = ["bash", "-c", " ".join(args)] From 144889d78d9e76d0cf69233e0fc5f2903eda4971 Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Thu, 31 Jul 2025 17:13:38 +0200 Subject: [PATCH 03/10] try to fix root._write --- serialize_py/codegen_result.py | 15 ++++++++++----- serialize_py/kaitai_serialize_codegen.py | 19 ++++++++++++------- serialize_py/kaitai_serialize_manual.py | 20 ++++++++++++++++---- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/serialize_py/codegen_result.py b/serialize_py/codegen_result.py index ba1acba..5ff04f1 100644 --- a/serialize_py/codegen_result.py +++ b/serialize_py/codegen_result.py @@ -8,6 +8,9 @@ def get_root(_io=None, check=True): if not _io: _io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size))) root = kaitaistruct_sqlite3.Sqlite3(_io) + # try to fix root._write + # https://github.com/kaitai-io/kaitai_struct/issues/1245 + root.pages__to_write = False root.header = kaitaistruct_sqlite3.Sqlite3.DatabaseHeader(root._io, root, root._root) header = root.header def init_header(header): @@ -46,11 +49,13 @@ def init_first_freelist_trunk_page(first_freelist_trunk_page): def get_io(): root = get_root() _io = root._io - # no. _write calls _fetch_instances which throws - # root._write(_io) - root._write__seq(_io) - # root._fetch_instances() # this would throw - root._io.write_back_child_streams() + if 1: + # no. _write calls _fetch_instances which throws + root._write(_io) + else: + root._write__seq(_io) + # root._fetch_instances() # this would throw + root._io.write_back_child_streams() return _io def get_bytes(): diff --git a/serialize_py/kaitai_serialize_codegen.py b/serialize_py/kaitai_serialize_codegen.py index 13e776a..0865d4a 100755 --- a/serialize_py/kaitai_serialize_codegen.py +++ b/serialize_py/kaitai_serialize_codegen.py @@ -38,7 +38,6 @@ def __getitem__(self, i): # i is 0-based ), f"page index is out of range: {i} is not in (0, {header.num_pages - 1})" # todo: maybe cache page # equality test: page_a.page_number == page_b.page_number - # FIXME handle root page: i == 0 _pos = root._io.pos() if i == 0: # The first 100 bytes of the database file comprise the database file header @@ -176,7 +175,7 @@ def get_local_key(key, global_names): root = kaitaistruct_sqlite3.Sqlite3.from_bytes(good_database_header_bytes) # patch the internal cache attribute of root.pages -# root._m_pages = PagesList(root) +root._m_pages = PagesList(root) root._read() @@ -309,6 +308,10 @@ def codegen( print(f"{ind}{ids}if not _io:", file=out) print(f"{ind}{ids}{ids}_io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size)))", file=out) print(f"{ind}{ids}{on} = {mod}.{member}(_io)", file=out) + # TODO remove. this works only for sqlite3.ksy + print(f"{ind}{ids}# try to fix root._write", file=out) + print(f"{ind}{ids}# https://github.com/kaitai-io/kaitai_struct/issues/1245", file=out) + print(f"{ind}{ids}{on}.pages__to_write = False", file=out) # else: # print(f"{ind}{ids}# non-root init", file=out) # print(f"{ind}{ids}{on} = {mod}.{member}(_io, {on_parent}, {on_parent}._root)", file=out) @@ -423,11 +426,13 @@ def codegen( print(f"{ind}{ids}root = get_root()", file=out) print(f"{ind}{ids}_io = root._io", file=out) # print(f"{ind}{ids}_io.seek(0)", file=out) - print(f"{ind}{ids}# no. _write calls _fetch_instances which throws", file=out) - print(f"{ind}{ids}# root._write(_io)", file=out) - print(f"{ind}{ids}root._write__seq(_io)", file=out) - print(f"{ind}{ids}# root._fetch_instances() # this would throw", file=out) - print(f"{ind}{ids}root._io.write_back_child_streams()", file=out) + print(f"{ind}{ids}if 1:", file=out) + print(f"{ind}{ids}{ids}# no. _write calls _fetch_instances which throws", file=out) + print(f"{ind}{ids}{ids}root._write(_io)", file=out) + print(f"{ind}{ids}else:", file=out) + print(f"{ind}{ids}{ids}root._write__seq(_io)", file=out) + print(f"{ind}{ids}{ids}# root._fetch_instances() # this would throw", file=out) + print(f"{ind}{ids}{ids}root._io.write_back_child_streams()", file=out) print(f"{ind}{ids}return _io", file=out) print("", file=out) print(f"{ind}def get_bytes():", file=out) diff --git a/serialize_py/kaitai_serialize_manual.py b/serialize_py/kaitai_serialize_manual.py index 6253adc..02f1209 100755 --- a/serialize_py/kaitai_serialize_manual.py +++ b/serialize_py/kaitai_serialize_manual.py @@ -75,7 +75,7 @@ def write(on): # FIXME add extra space for database header _io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(num_pages * page_size + 100))) -if 0: +if 1: root = kaitaistruct_sqlite3.Sqlite3(_io) else: class PatchedSqlite3(kaitaistruct_sqlite3.Sqlite3): @@ -167,6 +167,11 @@ def _write(self, io=None): root.pages = [] # page = set_value("root.header.root_page", root.BtreePage(page_number)) page = root.BtreePage(page_number) + +# try to fix root._write +# https://github.com/kaitai-io/kaitai_struct/issues/1245 +page.cell_content_area__to_write = False + root.pages.append(page) page.database_header = root.header page._root = root @@ -362,11 +367,18 @@ def _write(self, io=None): +# try to fix root._write +# https://github.com/kaitai-io/kaitai_struct/issues/1245 +root.pages__to_write = False + _io.seek(0) # fix: _write__seq does not seek before writing -# no. _write calls _fetch_instances which throws -# print(f"writing root"); root._write(_io) -print(f"writing root"); root._write__seq(_io); root._io.write_back_child_streams() +if 1: + # no. _write calls _fetch_instances which throws + # https://github.com/kaitai-io/kaitai_struct/issues/1245 + print(f"writing root"); root._write(_io) +else: + print(f"writing root"); root._write__seq(_io); root._io.write_back_child_streams() print("writing done") From 94e2fdc8b4f2db22b29da131848af7be7399fbcd Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Fri, 1 Aug 2025 07:01:34 +0200 Subject: [PATCH 04/10] remove unnecessary binding --- serialize_py/codegen_result.py | 6 ++---- serialize_py/kaitai_serialize_codegen.py | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/serialize_py/codegen_result.py b/serialize_py/codegen_result.py index 5ff04f1..2d4f7f3 100644 --- a/serialize_py/codegen_result.py +++ b/serialize_py/codegen_result.py @@ -12,7 +12,6 @@ def get_root(_io=None, check=True): # https://github.com/kaitai-io/kaitai_struct/issues/1245 root.pages__to_write = False root.header = kaitaistruct_sqlite3.Sqlite3.DatabaseHeader(root._io, root, root._root) - header = root.header def init_header(header): header.magic = b'SQLite format 3\x00' header.page_size_raw = 4096 # 0x1000 @@ -25,10 +24,9 @@ def init_header(header): header.file_change_counter = 1 header.num_pages = 2 header.first_freelist_trunk_page = kaitaistruct_sqlite3.Sqlite3.FreelistTrunkPagePointer(root._io, header, header._root) - first_freelist_trunk_page = header.first_freelist_trunk_page def init_first_freelist_trunk_page(first_freelist_trunk_page): first_freelist_trunk_page.page_number = 0 - init_first_freelist_trunk_page(first_freelist_trunk_page) + init_first_freelist_trunk_page(header.first_freelist_trunk_page) header.num_freelist_pages = 0 header.schema_cookie = 1 header.schema_format = 4 @@ -41,7 +39,7 @@ def init_first_freelist_trunk_page(first_freelist_trunk_page): header.reserved_header_bytes = 20 * b'\x00' header.version_valid_for = 1 header.sqlite_version_number = 3050001 # 0x2e8a11 - init_header(header) + init_header(root.header) if check: root._check() return root diff --git a/serialize_py/kaitai_serialize_codegen.py b/serialize_py/kaitai_serialize_codegen.py index 0865d4a..7d1f417 100755 --- a/serialize_py/kaitai_serialize_codegen.py +++ b/serialize_py/kaitai_serialize_codegen.py @@ -383,7 +383,6 @@ def codegen( print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{member}(root._io, {on}, {on}._root)", file=out) # long # avoid shadowing global variables local_key = get_local_key(key, global_names) - print(f"{ind}{ids}{local_key} = {on}.{key}", file=out) # print(f"{ind}{ids}if 1:", file=out) # no block scope # print(f"{ind}{ids}if {local_key} := {on}.{key}:", file=out) # no block scope # TypeError: 'int' object does not support the context manager protocol @@ -406,7 +405,7 @@ def codegen( module_map, global_names, ) - print(f"{ind}{ids}init_{key}({local_key})", file=out) # "init_" prefix + print(f"{ind}{ids}init_{key}({on}.{key})", file=out) # "init_" prefix # print(f"{ind}{ids}{key}_init({local_key})", file=out) # "_init" suffix # some user-defined types need this From 63fb687f43426f618597740b77c4c93c8e3396db Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Sun, 3 Aug 2025 20:52:31 +0200 Subject: [PATCH 05/10] add parse_page_by_page --- serialize_py/codegen_result.py | 2 +- serialize_py/kaitai_serialize_codegen.py | 451 ++++++++++++++++------- 2 files changed, 310 insertions(+), 143 deletions(-) diff --git a/serialize_py/codegen_result.py b/serialize_py/codegen_result.py index 2d4f7f3..96139c5 100644 --- a/serialize_py/codegen_result.py +++ b/serialize_py/codegen_result.py @@ -2,7 +2,7 @@ import kaitaistruct import kaitaistruct_sqlite3 -root_size = 100 +root_size = 8192 def get_root(_io=None, check=True): if not _io: diff --git a/serialize_py/kaitai_serialize_codegen.py b/serialize_py/kaitai_serialize_codegen.py index 7d1f417..0507253 100755 --- a/serialize_py/kaitai_serialize_codegen.py +++ b/serialize_py/kaitai_serialize_codegen.py @@ -6,6 +6,10 @@ good_database_path = "test_kaitai.py.good.db" codegen_database_path = "test_kaitai.py.codegen.db" +# this is useful for large databases (larger than RAM) +# parse_page_by_page = True +parse_page_by_page = False + import io import re import os @@ -156,121 +160,76 @@ def get_local_key(key, global_names): num += 1 return local_key -if not os.path.exists(good_database_path): - args = [ - "sqlite3", - good_database_path, - "create table test (id INTEGER)", - ] - print(">", shlex.join(args)) - subprocess.run(args) - -database_header_size = 100 - -with open(good_database_path, "rb") as f: - good_database_header_bytes = f.read(database_header_size) - -# create a database parser -# root = kaitaistruct_sqlite3.Sqlite3.from_file(good_database_path) -root = kaitaistruct_sqlite3.Sqlite3.from_bytes(good_database_header_bytes) - -# patch the internal cache attribute of root.pages -root._m_pages = PagesList(root) - -root._read() - -# print("root.header.magic", root.header.magic) -# now, this will parse **only** the first page -# fix: 'BtreePage' object has no attribute 'cell_pointers' - -if 0: - # print("root.pages[0] keys:", get_keys(root.pages[0])) - # print("root.pages[0] seq:", get_seq(root.pages[0])) - # FIXME kaitaistruct.ValidationNotEqualError: /types/database_header/seq/0: at pos 116: validation failed: not equal, - # expected b'SQLite format 3\x00', but got b'\r\x00\x00\x00\x01\x0f\xcc\x00\x0f\xcc\x00\x00\x00\x00\x00\x00' - print("root.pages[0]._read()"); root.pages[0]._read() - # print("root.pages[0] keys:", get_keys(root.pages[0])) - # print("root.pages[0] seq:", get_seq(root.pages[0])) - print_value("root.pages[0]") - # FIXME root.pages[0].page_type = error: 'BtreePage' object has no attribute 'page_type' - print_value("root.pages[0].page_type") - print_value("root.pages[0].num_cells") - print_value("root.pages[0].cell_pointers[0]") - print_value("root.pages[0].cell_pointers[0].ofs_content") -if 0: - # print("root.pages[1] keys:", get_keys(root.pages[1])) - # print("root.pages[1] seq:", get_seq(root.pages[1])) - print("root.pages[1]._read()"); root.pages[1]._read() - # print("root.pages[1] keys:", get_keys(root.pages[1])) - print("root.pages[1] seq:", get_seq(root.pages[1])) - print_value("root.pages[1]") - print_value("root.pages[1].page_type") - print_value("root.pages[1].num_cells") - # FIXME error: 'BtreePage' object has no attribute 'cell_pointers' - print_value("root.pages[1].cell_pointers[0]") - print_value("root.pages[1].cell_pointers[0].ofs_content") -# print(cell.content, dir(cell.content)) - -r""" -res = io.StringIO() -on_root = "root" -on = on_root -on_parent = on_root -root_class_name = root.__class__.__name__ -""" - - -r""" -mod = root.__class__.__module__ # "kaitaistruct_sqlite3" -mod = root_class_name # "Sqlite3" -mod = module_map.get(mod, mod) -print(f"{on_root} = {mod}.{root_class_name}()", file=res) -print_value("inspect.getsourcelines(root._read)") -print_value("inspect.getclosurevars(root._read)") -print_value("inspect.unwrap(root._read)") -print_value("inspect.get_annotations(root._read)") -""" - -r''' -lines, firstlineno = inspect.getsourcelines(root._read) -lines.pop(0) # "def _read(self):" -for line in lines: - line = line.rstrip() - print("line", line) - m = re.match(r"\s+self\.(\w+) = (\w+)\.(\w+)\(self\._io, self, self\._root\)", line) - if m: - print("m", m.groups()) - key, mod, member = m.groups() - mod = module_map.get(mod, mod) - # https://doc.kaitai.io/serialization.html#_user_defined_types +def get_class_qualname(mod_name, class_name): + mod_class_qualname_list = get_mod_class_qualname_list(mod_name) + # TODO + raise 123 + +import functools + +@functools.lru_cache(maxsize=100) +def get_mod_class_qualname_list(mod_name): + """ + example source: + + class A # depth = 0 # A + class B # 1 # A.B + class C # 2 # A.B.C + class D # 2 # A.B.D + class E # 0 # E + """ + # print("get_qualname", mod_name, class_name) + mod = sys.modules[mod_name] + lines, firstlineno = inspect.getsourcelines(mod) + lines.pop(0) # "def _read(self):" + seq = [] + # class_name_tree = dict() + class_qualname_list = list() + prev_depth = 0 + prev_class_qualname = [] + for line in lines: + m = re.match(r"(\s*)class (\w+)\(([\w, ]+)\):", line) + if not m: continue + indent, name, args = m.groups() + depth = len(indent) // 4 """ - if on == "root": - print(f"{on}.{key} = {mod}.{member}()", file=res) - else: - print(f"{on}.{key} = {mod}.{member}(None, {on_parent}, {on_parent}._root)", file=res) + if depth == 0: + # root class + # class_name_tree[name] = dict() + class_qualname = [name] + class_qualname_list.append(class_qualname) + prev_class_qualname = class_qualname + continue + assert depth > 0 """ - print(f"{on}.{key} = {mod}.{member}(None, {on_parent}, {on_parent}._root)", file=res) -''' + if depth > prev_depth: + assert depth == prev_depth + 1 + """ + if depth > prev_depth: + assert depth == prev_depth + 1 + class_qualname = prev_class_qualname + [name] + elif depth == prev_depth: + class_qualname = prev_class_qualname[:-1] + [name] + elif depth < prev_depth: + class_qualname = prev_class_qualname[:depth] + [name] + """ + class_qualname = prev_class_qualname[:depth] + [name] + class_qualname_list.append(class_qualname) + prev_class_qualname = class_qualname -val = None + # class Sqlite3(ReadWriteKaitaiStruct): + # class FormatVersion(IntEnum): -on_root = "root" -on = on_root -on_parent = on_root -root_class_name = root.__class__.__name__ -module_map = { - "Sqlite3": "kaitaistruct_sqlite3", -} -mod = root.__class__.__module__ # "kaitaistruct_sqlite3" -mod = root_class_name # "Sqlite3" -mod = module_map.get(mod, mod) def codegen( obj, out, on="root", on_parent=None, + # FIXME rename root to codegen_root + # this can be different than the actual parser "root" class root=None, + root_name="root", indent_step=4*" ", indent_level=0, enum_map_map={}, @@ -279,7 +238,13 @@ def codegen( ): global val # fix print_value mod = obj.__class__.__module__ - member = obj.__class__.__name__ + # member = obj.__class__.__name__ # DatabaseHeader + member = obj.__class__.__qualname__ # Sqlite3.DatabaseHeader + """ + print("obj.__class__.__module__", obj.__class__.__module__) + print("obj.__class__.__name__", obj.__class__.__name__) + print("obj.__class__.__qualname__", obj.__class__.__qualname__) + """ is_root = True if on_parent == None else False if is_root: root = obj @@ -292,7 +257,7 @@ def codegen( global_names.append(f"local_{key}_2") global_names.append(mod) # TODO add imports of dependencies. example: vlq_base128_be for sqlite3 - root_cln = root.__class__.__name__ + # root_cln = root.__class__.__qualname__ ind = indent_level * indent_step ids = indent_step if is_root: @@ -304,7 +269,7 @@ def codegen( print("", file=out) print(f"{ind}root_size = {root._io._size}", file=out) print("", file=out) - print(f"{ind}def get_root(_io=None, check=True):", file=out) + print(f"{ind}def get_{root_name}(_io=None, check=True):", file=out) print(f"{ind}{ids}if not _io:", file=out) print(f"{ind}{ids}{ids}_io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size)))", file=out) print(f"{ind}{ids}{on} = {mod}.{member}(_io)", file=out) @@ -312,6 +277,7 @@ def codegen( print(f"{ind}{ids}# try to fix root._write", file=out) print(f"{ind}{ids}# https://github.com/kaitai-io/kaitai_struct/issues/1245", file=out) print(f"{ind}{ids}{on}.pages__to_write = False", file=out) + # root.pages__to_write = True # else: # print(f"{ind}{ids}# non-root init", file=out) # print(f"{ind}{ids}{on} = {mod}.{member}(_io, {on_parent}, {on_parent}._root)", file=out) @@ -322,11 +288,13 @@ def codegen( print("key", repr(key)) print("val", repr(val), dir(val)) print_value("val.__class__.__module__") - print_value("val.__class__.__name__") + print_value("val.__class__.__qualname__") """ # obj.__class__.__module__ == 'builtins' + # TODO rename to "mod_name" mod = val.__class__.__module__ - member = val.__class__.__name__ + # TODO rename to "member_name" + member = val.__class__.__qualname__ # builtin types: int, bytes, ... if mod == "builtins": @@ -368,7 +336,14 @@ def codegen( val_str = str(val) if val > 10: val_str += f" = {hex(val)}" - print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{enum_name}.{enum_key} # {val_str}", file=out) + # enum_qualname = get_class_qualname(mod, enum_name) + # this assumes that all enum classes are direct children of the parser_root class + # FIXME rename root to codegen_root != parser_root + parser_root_class_name = root.__class__.__qualname__.split(".")[0] + enum_qualname = f"{parser_root_class_name}.{enum_name}" + # print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{enum_name}.{enum_key} # {val_str}", file=out) + # print(f"{ind}{ids}{on}.{key} = {mod}.{enum_name}.{enum_key} # {val_str}", file=out) + print(f"{ind}{ids}{on}.{key} = {mod}.{enum_qualname}.{enum_key} # {val_str}", file=out) continue # TODO handle list types @@ -380,7 +355,8 @@ def codegen( # user-defined types # https://doc.kaitai.io/serialization.html#_user_defined_types # print(f"{ind}{ids}{on}.{key} = root.{member}(root._io, {on}, {on}._root)", file=out) # short - print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{member}(root._io, {on}, {on}._root)", file=out) # long + # print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{member}(root._io, {on}, {on}._root)", file=out) # long + print(f"{ind}{ids}{on}.{key} = {mod}.{member}(root._io, {on}, {on}._root)", file=out) # long # avoid shadowing global variables local_key = get_local_key(key, global_names) # print(f"{ind}{ids}if 1:", file=out) # no block scope @@ -399,6 +375,7 @@ def codegen( local_key, on, root, + root_name, indent_step, (indent_level + 1), enum_map_map, @@ -422,16 +399,16 @@ def codegen( if is_root: print("", file=out) print(f"{ind}def get_io():", file=out) - print(f"{ind}{ids}root = get_root()", file=out) - print(f"{ind}{ids}_io = root._io", file=out) + print(f"{ind}{ids}{root_name} = get_{root_name}()", file=out) + print(f"{ind}{ids}_io = {root_name}._io", file=out) # print(f"{ind}{ids}_io.seek(0)", file=out) print(f"{ind}{ids}if 1:", file=out) print(f"{ind}{ids}{ids}# no. _write calls _fetch_instances which throws", file=out) - print(f"{ind}{ids}{ids}root._write(_io)", file=out) + print(f"{ind}{ids}{ids}{root_name}._write(_io)", file=out) print(f"{ind}{ids}else:", file=out) - print(f"{ind}{ids}{ids}root._write__seq(_io)", file=out) - print(f"{ind}{ids}{ids}# root._fetch_instances() # this would throw", file=out) - print(f"{ind}{ids}{ids}root._io.write_back_child_streams()", file=out) + print(f"{ind}{ids}{ids}{root_name}._write__seq(_io)", file=out) + print(f"{ind}{ids}{ids}# {root_name}._fetch_instances() # this would throw", file=out) + print(f"{ind}{ids}{ids}{root_name}._io.write_back_child_streams()", file=out) print(f"{ind}{ids}return _io", file=out) print("", file=out) print(f"{ind}def get_bytes():", file=out) @@ -439,33 +416,223 @@ def codegen( print(f"{ind}{ids}_io.seek(0)", file=out) print(f"{ind}{ids}return _io.read_bytes_full()", file=out) -out = io.StringIO() -# rename imports -module_map = { - "Sqlite3": "kaitaistruct_sqlite3", -} -codegen(root, out, module_map=module_map) +if not os.path.exists(good_database_path): + args = [ + "sqlite3", + good_database_path, + "create table test (id INTEGER)", + ] + print(">", shlex.join(args)) + subprocess.run(args) -print("codegen result:") -print(out.getvalue()) -with open("codegen_result.py", "w") as f: - f.write(out.getvalue()) +database_header_size = 100 -import codegen_result -codegen_bytes = codegen_result.get_bytes() -if codegen_bytes == len(codegen_bytes) * b"\x00": - raise Exception("codegen_bytes are only null bytes") -with open(codegen_database_path, "wb") as f: - f.write(codegen_bytes) +database_size = os.path.getsize(good_database_path) +print(f"database_size {database_size}") -# TODO rewrite diff in python +# database_page_size = 4096 # default args = [ - "diff", "--color=always", "-u", - "<(", "xxd", codegen_database_path, ")", # red - # "<(", "xxd", good_database_path, ")", # green - f"<( head -c{database_header_size} {good_database_path} | xxd )", # green - "|", "head", "-n100", + "sqlite3", + good_database_path, + "pragma page_size", ] -args = ["bash", "-c", " ".join(args)] -print(">", shlex.join(args)) -subprocess.run(args) +database_page_size = int(subprocess.check_output(args, text=True)) +print(f"database_page_size {database_page_size}") + +with open(good_database_path, "rb") as f: + good_database_header_bytes = f.read(database_header_size) + +assert database_size % database_page_size == 0 + +database_num_pages = database_size // database_page_size +print(f"database_num_pages {database_num_pages}") + + + + +# print(cell.content, dir(cell.content)) + +r""" +res = io.StringIO() +on_root = "root" +on = on_root +on_parent = on_root +root_class_name = root.__class__.__qualname__ +""" + + +r""" +mod = root.__class__.__module__ # "kaitaistruct_sqlite3" +mod = root_class_name # "Sqlite3" +mod = module_map.get(mod, mod) +print(f"{on_root} = {mod}.{root_class_name}()", file=res) +print_value("inspect.getsourcelines(root._read)") +print_value("inspect.getclosurevars(root._read)") +print_value("inspect.unwrap(root._read)") +print_value("inspect.get_annotations(root._read)") +""" + +r''' +lines, firstlineno = inspect.getsourcelines(root._read) +lines.pop(0) # "def _read(self):" +for line in lines: + line = line.rstrip() + print("line", line) + m = re.match(r"\s+self\.(\w+) = (\w+)\.(\w+)\(self\._io, self, self\._root\)", line) + if m: + print("m", m.groups()) + key, mod, member = m.groups() + mod = module_map.get(mod, mod) + # https://doc.kaitai.io/serialization.html#_user_defined_types + """ + if on == "root": + print(f"{on}.{key} = {mod}.{member}()", file=res) + else: + print(f"{on}.{key} = {mod}.{member}(None, {on_parent}, {on_parent}._root)", file=res) + """ + print(f"{on}.{key} = {mod}.{member}(None, {on_parent}, {on_parent}._root)", file=res) +''' + + + +if not parse_page_by_page: + parse_chunk_list = [( + 0, + database_size, + kaitaistruct_sqlite3.Sqlite3, + None, + "root", + )] +else: + parse_chunk_list = [( + 0, + 100, + kaitaistruct_sqlite3.Sqlite3.DatabaseHeader, + None, + "header", + )] + for page_idx in range(database_num_pages): + page_num = page_idx + 1 + if page_idx == 0: + parse_chunk_list += [( + 100, + (database_page_size - 100), + kaitaistruct_sqlite3.Sqlite3.BtreePage, + (page_num,), + f"page_{page_num}", + )] + else: + parse_chunk_list += [( + page_idx * database_page_size, + (page_idx + 1) * database_page_size, + kaitaistruct_sqlite3.Sqlite3.BtreePage, + (page_num,), + f"page_{page_num}", + )] + +# TODO? +output_file = open("test_kaitai.py.codegen.db", "wb") +# TODO output_file.write(...) + +# rename imports +module_map = { + "Sqlite3": "kaitaistruct_sqlite3", +} + +# out = io.StringIO() +# codegen(root, out, module_map=module_map) + +for parse_chunk in parse_chunk_list: + + ( + parse_chunk_offset, + parse_chunk_size, + parse_chunk_parser, + parse_chunk_parser_args, + parse_chunk_root_name, + ) = parse_chunk + + if not parse_chunk_parser_args: + parse_chunk_parser_args = tuple() + + # create a database parser + # FIXME pass parse_chunk_parser_args + """ + if parse_page_by_page: + with open(good_database_path, "rb") as f: + f.seek(parse_chunk_offset) + _bytes = f.read(parse_chunk_size) + # root = kaitaistruct_sqlite3.Sqlite3.from_bytes(good_database_header_bytes) + root = parse_chunk_parser.from_bytes(_bytes) + else: + # root = kaitaistruct_sqlite3.Sqlite3.from_file(good_database_path) + root = parse_chunk_parser.from_file(good_database_path) + """ + + bytes_io = io.BytesIO() # store all input bytes in RAM + with open(good_database_path, "rb") as f: + f.seek(parse_chunk_offset) + # _bytes = f.read(parse_chunk_size) + bytes_io.write(f.read(parse_chunk_size)) + bytes_io.seek(0) + _io = kaitaistruct.KaitaiStream(bytes_io) + + print(f"calling parser {parse_chunk_parser.__module__}.{parse_chunk_parser.__qualname__}") + # FIXME rename to codegen_root + root = parse_chunk_parser(*parse_chunk_parser_args, _io=_io) + + # TODO remove? + if parse_page_by_page: + # patch the internal cache attribute of root.pages + root._m_pages = PagesList(root) + + root._read() + + # debug: print some values + if 0: + # print("root.header.magic", root.header.magic) + # now, this will parse **only** the first page + # fix: 'BtreePage' object has no attribute 'cell_pointers' + if not parse_page_by_page: + for page_idx in range(database_num_pages): + # print(f"root.pages[{page_idx}] keys:", get_keys(root.pages[page_idx])) + # print(f"root.pages[{page_idx}] seq:", get_seq(root.pages[page_idx])) + # FIXME kaitaistruct.ValidationNotEqualError: /types/database_header/seq/0: at pos 116: validation failed: not equal, + # expected b'SQLite format 3\x00', but got b'\r\x00\x00\x00\x01\x0f\xcc\x00\x0f\xcc\x00\x00\x00\x00\x00\x00' + print(f"root.pages[{page_idx}]._read()"); root.pages[page_idx]._read() + # print(f"root.pages[{page_idx}] keys:", get_keys(root.pages[page_idx])) + # print(f"root.pages[{page_idx}] seq:", get_seq(root.pages[page_idx])) + print_value(f"root.pages[{page_idx}]") + # FIXME root.pages[{page_idx}].page_type = error: 'BtreePage' object has no attribute 'page_type' + print_value(f"root.pages[{page_idx}].page_type") + print_value(f"root.pages[{page_idx}].num_cells") + for cell_idx in range(1): + print_value(f"root.pages[{page_idx}].cell_pointers[{cell_idx}]") + print_value(f"root.pages[{page_idx}].cell_pointers[{cell_idx}].ofs_content") + + out = io.StringIO() # store all output code in RAM + codegen(root, out, module_map=module_map, root_name=parse_chunk_root_name) + + print("codegen result:") + print(out.getvalue()) + with open("codegen_result.py", "w") as f: + f.write(out.getvalue()) + + import codegen_result + codegen_bytes = codegen_result.get_bytes() + if codegen_bytes == len(codegen_bytes) * b"\x00": + raise Exception("codegen_bytes are only null bytes") + with open(codegen_database_path, "wb") as f: + f.write(codegen_bytes) + + # TODO rewrite diff in python + args = [ + "diff", "--color=always", "-u", + # TODO seek? + "<(", "xxd", codegen_database_path, ")", # red + f"<( head -c{parse_chunk_offset + parse_chunk_size} {good_database_path} | tail -c{parse_chunk_size} | xxd )", # green + "|", "head", "-n100", + ] + args = ["bash", "-c", " ".join(args)] + print(">", shlex.join(args)) + subprocess.run(args) From a51799740da3a590846333203834867a943ef27f Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Sun, 3 Aug 2025 23:03:51 +0200 Subject: [PATCH 06/10] sqlite3.ksy: fix btree_page.cell_content_area --- serialize_py/kaitaistruct_sqlite3.py | 8 ++++++-- serialize_py/sqlite3.ksy | 8 +++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/serialize_py/kaitaistruct_sqlite3.py b/serialize_py/kaitaistruct_sqlite3.py index 12cf991..60919bb 100644 --- a/serialize_py/kaitaistruct_sqlite3.py +++ b/serialize_py/kaitaistruct_sqlite3.py @@ -338,13 +338,17 @@ def _invalidate_ofs_cell_content_area(self): del self._m_ofs_cell_content_area @property def cell_content_area(self): + """We parse the first page separate from the 100 byte database header, + so for the first page, we have to subtract 100 from the offset, + to make the offset relative to our "page". + """ if self._should_write_cell_content_area: self._write_cell_content_area() if hasattr(self, '_m_cell_content_area'): return self._m_cell_content_area _pos = self._io.pos() - self._io.seek(self.ofs_cell_content_area) + self._io.seek(((self.ofs_cell_content_area - 100) if (self.page_number == 1) else self.ofs_cell_content_area)) self._m_cell_content_area = self._io.read_bytes((self._root.header.usable_size - self.ofs_cell_content_area)) self._io.seek(_pos) return getattr(self, '_m_cell_content_area', None) @@ -356,7 +360,7 @@ def cell_content_area(self, v): def _write_cell_content_area(self): self._should_write_cell_content_area = False _pos = self._io.pos() - self._io.seek(self.ofs_cell_content_area) + self._io.seek(((self.ofs_cell_content_area - 100) if (self.page_number == 1) else self.ofs_cell_content_area)) self._io.write_bytes(self.cell_content_area) self._io.seek(_pos) diff --git a/serialize_py/sqlite3.ksy b/serialize_py/sqlite3.ksy index 664b0fb..b83a0e3 100644 --- a/serialize_py/sqlite3.ksy +++ b/serialize_py/sqlite3.ksy @@ -371,6 +371,7 @@ types: doc: | The start of the cell content area. A zero value for this integer is interpreted as 65536. The interpreted value is available as `cell_content_area`. + The offset is relative to the current page. - id: num_frag_free_bytes type: u1 doc: The number of fragmented free bytes within the cell content area. @@ -388,8 +389,13 @@ types: ofs_cell_content_area: value: 'ofs_cell_content_area_raw == 0 ? 65536 : ofs_cell_content_area_raw' cell_content_area: - pos: ofs_cell_content_area + # pos: ofs_cell_content_area + pos: 'page_number == 1 ? (ofs_cell_content_area - 100) : ofs_cell_content_area' size: _root.header.usable_size - ofs_cell_content_area + doc: | + We parse the first page separate from the 100 byte database header, + so for the first page, we have to subtract 100 from the offset, + to make the offset relative to our "page". reserved_space: pos: _root.header.page_size - _root.header.page_reserved_space_size size-eos: true From 289e8c30e2ed3e3dcaf2b5a846556063334c4adc Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Sun, 3 Aug 2025 23:05:07 +0200 Subject: [PATCH 07/10] also handle positional instances --- serialize_py/codegen_result.py | 37 ++++- serialize_py/kaitai_serialize_codegen.py | 178 ++++++++++++++++++++--- 2 files changed, 188 insertions(+), 27 deletions(-) diff --git a/serialize_py/codegen_result.py b/serialize_py/codegen_result.py index 96139c5..ffd397d 100644 --- a/serialize_py/codegen_result.py +++ b/serialize_py/codegen_result.py @@ -7,11 +7,8 @@ def get_root(_io=None, check=True): if not _io: _io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size))) - root = kaitaistruct_sqlite3.Sqlite3(_io) - # try to fix root._write - # https://github.com/kaitai-io/kaitai_struct/issues/1245 - root.pages__to_write = False - root.header = kaitaistruct_sqlite3.Sqlite3.DatabaseHeader(root._io, root, root._root) + root = kaitaistruct_sqlite3.Sqlite3(_io=_io, _parent=None, _root=None) + root.header = kaitaistruct_sqlite3.Sqlite3.DatabaseHeader(_io=root._io, _parent=root, _root=root._root) def init_header(header): header.magic = b'SQLite format 3\x00' header.page_size_raw = 4096 # 0x1000 @@ -23,9 +20,10 @@ def init_header(header): header.leaf_payload_fraction = 32 # 0x20 header.file_change_counter = 1 header.num_pages = 2 - header.first_freelist_trunk_page = kaitaistruct_sqlite3.Sqlite3.FreelistTrunkPagePointer(root._io, header, header._root) + header.first_freelist_trunk_page = kaitaistruct_sqlite3.Sqlite3.FreelistTrunkPagePointer(_io=root._io, _parent=header, _root=header._root) def init_first_freelist_trunk_page(first_freelist_trunk_page): first_freelist_trunk_page.page_number = 0 + first_freelist_trunk_page.page = None init_first_freelist_trunk_page(header.first_freelist_trunk_page) header.num_freelist_pages = 0 header.schema_cookie = 1 @@ -40,6 +38,33 @@ def init_first_freelist_trunk_page(first_freelist_trunk_page): header.version_valid_for = 1 header.sqlite_version_number = 3050001 # 0x2e8a11 init_header(root.header) + root.pages = [] + root.pages.append(kaitaistruct_sqlite3.Sqlite3.BtreePage(page_number=1, _io=root._io, _parent=root, _root=root._root)) + def init_page(page): + page.page_type = kaitaistruct_sqlite3.Sqlite3.BtreePageType.table_leaf_page # 13 = 0xd + page.first_freeblock = 0 + page.num_cells = 1 + page.ofs_cell_content_area_raw = 4044 # 0xfcc + page.num_frag_free_bytes = 0 + page.cell_pointers = [] + page.cell_pointers.append(kaitaistruct_sqlite3.Sqlite3.CellPointer(_io=root._io, _parent=page, _root=page._root)) + def init_cell_pointer(cell_pointer): + cell_pointer.ofs_content = 4044 # 0xfcc + init_cell_pointer(page.cell_pointers[0]) + page.cell_content_area = b'2\x01\x06\x17\x15\x15\x01Itabletesttest\x02CREATE TABLE test (id INTEGER)' + page.reserved_space = None + init_page(root.pages[0]) + root.pages.append(kaitaistruct_sqlite3.Sqlite3.BtreePage(page_number=2, _io=root._io, _parent=root, _root=root._root)) + def init_page(page): + page.page_type = kaitaistruct_sqlite3.Sqlite3.BtreePageType.table_leaf_page # 13 = 0xd + page.first_freeblock = 0 + page.num_cells = 0 + page.ofs_cell_content_area_raw = 4096 # 0x1000 + page.num_frag_free_bytes = 0 + page.cell_pointers = [] + page.cell_content_area = b'' + page.reserved_space = None + init_page(root.pages[1]) if check: root._check() return root diff --git a/serialize_py/kaitai_serialize_codegen.py b/serialize_py/kaitai_serialize_codegen.py index 0507253..f9ae46d 100755 --- a/serialize_py/kaitai_serialize_codegen.py +++ b/serialize_py/kaitai_serialize_codegen.py @@ -88,7 +88,9 @@ def get_keys(obj): def f(k): if k[0] == "_": return False if "A" <= k[0] <= "Z": return False - if k in ("close", "from_bytes", "from_file", "from_io", "pages__to_write"): return False + if k in ("close", "from_bytes", "from_file", "from_io"): return False + # https://doc.kaitai.io/user_guide.html#_instances_data_beyond_the_sequence + if k.endswith("__to_write"): return False return True keys = list(filter(f, keys)) return keys @@ -133,6 +135,24 @@ def get_seq(obj): continue return seq +def get_instances(obj): + # TODO upstream: this should be simpler + if not hasattr(obj, "_fetch_instances"): + return [] + _fetch_instances = getattr(obj, "_fetch_instances") + lines, firstlineno = inspect.getsourcelines(_fetch_instances) + lines.pop(0) # "def _fetch_instances(self):" + instances = [] + for line in lines: + line = line.rstrip() + # print("line", line) + # line: _ = self.pages + m = re.match(r"\s+_ = self\.(\w+)", line) + if m: + instances.append(m[1]) + continue + return instances + def parse_enum_map(lines): enum_map = dict() line0 = lines.pop(0) @@ -150,6 +170,8 @@ def parse_enum_map(lines): return enum_map def get_local_key(key, global_names): + # # handle array item keys like "some_array[123]" + # key = key.replace("[", "_").replace("]", "_") num = 1 local_key = key while local_key in global_names: @@ -221,6 +243,9 @@ class E # 0 # E # class FormatVersion(IntEnum): +debug_init_types = False + + def codegen( obj, out, @@ -236,6 +261,7 @@ def codegen( module_map={}, global_names=[], ): + print("codegen obj", obj) global val # fix print_value mod = obj.__class__.__module__ # member = obj.__class__.__name__ # DatabaseHeader @@ -272,18 +298,53 @@ def codegen( print(f"{ind}def get_{root_name}(_io=None, check=True):", file=out) print(f"{ind}{ids}if not _io:", file=out) print(f"{ind}{ids}{ids}_io = kaitaistruct.KaitaiStream(io.BytesIO(bytearray(root_size)))", file=out) - print(f"{ind}{ids}{on} = {mod}.{member}(_io)", file=out) - # TODO remove. this works only for sqlite3.ksy - print(f"{ind}{ids}# try to fix root._write", file=out) - print(f"{ind}{ids}# https://github.com/kaitai-io/kaitai_struct/issues/1245", file=out) - print(f"{ind}{ids}{on}.pages__to_write = False", file=out) - # root.pages__to_write = True + # TODO also pass parameters to root.__init__ + """ + val_params = [] + if hasattr(val, "__init__"): + val_init_sig = inspect.signature(val.__init__) + # ... + """ + + # print(f"{ind}{ids}{on} = {mod}.{member}(_io=_io)", file=out) + on_parent_root = f"{on_parent}._root" if on_parent else "None" + print(f"{ind}{ids}{on} = {mod}.{member}(_io=_io, _parent={on_parent}, _root={on_parent_root})", file=out) + + # print(f"{ind}{ids}assert {on}._root == {on}", file=out) # debug + + if parse_page_by_page: + # TODO remove. this works only for sqlite3.ksy + print(f"{ind}{ids}# try to fix root._write", file=out) + print(f"{ind}{ids}# https://github.com/kaitai-io/kaitai_struct/issues/1245", file=out) + print(f"{ind}{ids}{on}.pages__to_write = False", file=out) + # root.pages__to_write = True # else: # print(f"{ind}{ids}# non-root init", file=out) # print(f"{ind}{ids}{on} = {mod}.{member}(_io, {on_parent}, {on_parent}._root)", file=out) - for key in get_seq(obj): + # TODO? interleave "seq" and "instance" keys + # TODO rename to seq_key? + # for key in get_seq(obj): + key_stack = get_seq(obj) + get_instances(obj) + while key_stack: + key = key_stack.pop(0) # print(f"{ind}{ids}# key {key}", file=out) - val = getattr(obj, key) + print("key", key) # debug + val_is_list_item = False + if key.endswith("]"): + # val is a list item + val_is_list_item = True + m = re.fullmatch(r"(\w+)\[(\d+)\]", key) + val_arr_name, val_arr_idx = m.groups() + val_arr_idx = int(val_arr_idx) + val_arr = getattr(obj, val_arr_name) + val = val_arr[val_arr_idx] + else: + # FIXME get_seq also returns items where the "if" condition is false + # val = getattr(obj, key) + try: + val = getattr(obj, key) + except AttributeError: + continue """ print("key", repr(key)) print("val", repr(val), dir(val)) @@ -298,15 +359,30 @@ def codegen( # builtin types: int, bytes, ... if mod == "builtins": + if debug_init_types: + print(f"{ind}{ids}# builtin type {type(val).__name__}", file=out) if isinstance(val, int) and val > 10: print(f"{ind}{ids}{on}.{key} = {val!r} # {hex(val)}", file=out) continue if isinstance(val, bytes) and val == len(val) * b"\x00": # compress null bytes # TODO partial compression of bytestrings - print(f"{ind}{ids}{on}.{key} = {len(val)} * b'\\x00'", file=out) + if len(val) == 0: + print(f"{ind}{ids}{on}.{key} = b''", file=out) + else: + print(f"{ind}{ids}{on}.{key} = {len(val)} * b'\\x00'", file=out) + continue + if isinstance(val, list): + print(f"{ind}{ids}{on}.{key} = []", file=out) + new_keys = [] + for item_idx in range(len(val)): + new_keys.append(f"{key}[{item_idx}]") + # recursion via stack + key_stack = new_keys + key_stack + # TODO + # print(f"{ind}{ids}{on}.{key}.append({xxxxxxx})", file=out) continue - # bytes, ... + # bytes, str, ... print(f"{ind}{ids}{on}.{key} = {val!r}", file=out) continue @@ -327,6 +403,8 @@ def codegen( m = re.match(r"\s*class (\w+)\(([A-Z][A-Za-z0-9]*Enum)\):", lines[0].rstrip()) if m: enum_name, enum_type = m.groups() + if debug_init_types: + print(f"{ind}{ids}# enum type {enum_name}", file=out) enum_map = enum_map_map.get(enum_name) # read cache if not enum_map: enum_map = parse_enum_map(lines) @@ -346,19 +424,50 @@ def codegen( print(f"{ind}{ids}{on}.{key} = {mod}.{enum_qualname}.{enum_key} # {val_str}", file=out) continue - # TODO handle list types - # m = ... - # if m: - # ... - # continue - # user-defined types + if debug_init_types: + print(f"{ind}{ids}# user-defined type {member}", file=out) # https://doc.kaitai.io/serialization.html#_user_defined_types # print(f"{ind}{ids}{on}.{key} = root.{member}(root._io, {on}, {on}._root)", file=out) # short # print(f"{ind}{ids}{on}.{key} = {mod}.{root_cln}.{member}(root._io, {on}, {on}._root)", file=out) # long - print(f"{ind}{ids}{on}.{key} = {mod}.{member}(root._io, {on}, {on}._root)", file=out) # long + # print(f"{ind}{ids}{on}.{key} = {mod}.{member}(root._io, {on}, {on}._root)", file=out) # long + val_params = [] + if hasattr(val, "__init__"): + val_init_sig = inspect.signature(val.__init__) + if str(val_init_sig) != "(_io=None, _parent=None, _root=None)": + # print("val_init_sig", repr(val_init_sig)) + # val.__init__ has extra args + # example: page_number in "(page_number, _io=None, _parent=None, _root=None)" + for param_name in val_init_sig.parameters.keys(): + # print(f"param_name {param_name}") + if param_name in ("_io", "_parent", "_root"): + continue + # FIXME handle user-defined types via recursion + # example: + """ + def get_page_number(): + # ... + pages.append(BtreePage(page_number=get_page_number(), _io=root._io, _parent=root, _root=root._root)) + """ + param_val = getattr(val, param_name) + val_params.append(f"{param_name}={param_val}") + val_params = "".join(map(lambda arg: arg + ", ", val_params)) + if val_is_list_item: + print(f"{ind}{ids}{on}.{val_arr_name}.append({mod}.{member}({val_params}_io=root._io, _parent={on}, _root={on}._root))", file=out) # long + else: + print(f"{ind}{ids}{on}.{key} = {mod}.{member}({val_params}_io=root._io, _parent={on}, _root={on}._root)", file=out) # long + def get_singular_name(plural_name): + # vals -> val + # val_list -> val + if plural_name.endswith("_list"): return plural_name[:-5] + if plural_name.endswith("_array"): return plural_name[:-6] + if plural_name.endswith("s"): return plural_name[:-1] + return plural_name # avoid shadowing global variables - local_key = get_local_key(key, global_names) + if val_is_list_item: + local_key = get_local_key(get_singular_name(val_arr_name), global_names) + else: + local_key = get_local_key(key, global_names) # print(f"{ind}{ids}if 1:", file=out) # no block scope # print(f"{ind}{ids}if {local_key} := {on}.{key}:", file=out) # no block scope # TypeError: 'int' object does not support the context manager protocol @@ -366,7 +475,7 @@ def codegen( # create block scope # this is required to avoid name collisions between scopes # https://stackoverflow.com/a/45210833/10440128 - print(f"{ind}{ids}def init_{key}({local_key}):", file=out) # "init_" prefix + print(f"{ind}{ids}def init_{local_key}({local_key}):", file=out) # "init_" prefix # print(f"{ind}{ids}def {key}_init({local_key}):", file=out) # "_init" suffix # recursion codegen( @@ -382,9 +491,36 @@ def codegen( module_map, global_names, ) - print(f"{ind}{ids}init_{key}({on}.{key})", file=out) # "init_" prefix + + if val_is_list_item: + print(f"{ind}{ids}init_{local_key}({on}.{val_arr_name}[{val_arr_idx}])", file=out) # "init_" prefix + else: + print(f"{ind}{ids}init_{local_key}({on}.{key})", file=out) # "init_" prefix + # print(f"{ind}{ids}{key}_init({local_key})", file=out) # "_init" suffix + # for instance_key in get_instances(obj): + if 0: + # print(f"{ind}{ids}# instance_key {instance_key}", file=out) + val = getattr(obj, instance_key) + """ + print("instance_key", repr(instance_key)) + print("val", repr(val), dir(val)) + print_value("val.__class__.__module__") + print_value("val.__class__.__qualname__") + """ + # obj.__class__.__module__ == 'builtins' + # TODO rename to "mod_name" + mod = val.__class__.__module__ + # TODO rename to "member_name" + member = val.__class__.__qualname__ + + print("obj", obj) + print("FIXME instance_key", instance_key, val, mod, member) + # FIXME instance_key page 0 builtins int + # FIXME instance_key page None builtins NoneType + raise 123 + # some user-defined types need this # example: AttributeError: 'VlqBase128Be' object has no attribute 'groups' # but this breaks other cases... From f68dec68402a15b464f5da6b6e54b480deef8f1f Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Mon, 4 Aug 2025 09:19:43 +0200 Subject: [PATCH 08/10] use deque for key_stack --- serialize_py/kaitai_serialize_codegen.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/serialize_py/kaitai_serialize_codegen.py b/serialize_py/kaitai_serialize_codegen.py index f9ae46d..300efb2 100755 --- a/serialize_py/kaitai_serialize_codegen.py +++ b/serialize_py/kaitai_serialize_codegen.py @@ -14,6 +14,7 @@ import re import os import sys +import queue import shlex import inspect import subprocess @@ -321,12 +322,9 @@ def codegen( # else: # print(f"{ind}{ids}# non-root init", file=out) # print(f"{ind}{ids}{on} = {mod}.{member}(_io, {on_parent}, {on_parent}._root)", file=out) - # TODO? interleave "seq" and "instance" keys - # TODO rename to seq_key? - # for key in get_seq(obj): - key_stack = get_seq(obj) + get_instances(obj) + key_stack = queue.deque(get_seq(obj) + get_instances(obj)) while key_stack: - key = key_stack.pop(0) + key = key_stack.popleft() # print(f"{ind}{ids}# key {key}", file=out) print("key", key) # debug val_is_list_item = False @@ -378,7 +376,8 @@ def codegen( for item_idx in range(len(val)): new_keys.append(f"{key}[{item_idx}]") # recursion via stack - key_stack = new_keys + key_stack + new_keys.reverse() # extendleft adds values in reverse order + key_stack.extendleft(new_keys) # TODO # print(f"{ind}{ids}{on}.{key}.append({xxxxxxx})", file=out) continue From b6046a965a47156d5ae575f34bf7b876ee8db10e Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Mon, 4 Aug 2025 10:43:25 +0200 Subject: [PATCH 09/10] kaitaistruct_sqlite3.py: use early binding of i https://github.com/kaitai-io/kaitai_struct/issues/1246 --- serialize_py/kaitaistruct_sqlite3.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/serialize_py/kaitaistruct_sqlite3.py b/serialize_py/kaitaistruct_sqlite3.py index 60919bb..88b0075 100644 --- a/serialize_py/kaitaistruct_sqlite3.py +++ b/serialize_py/kaitaistruct_sqlite3.py @@ -2044,7 +2044,9 @@ def _write_pages(self): self._io.add_child_stream(_io__raw__m_pages) _pos2 = self._io.pos() self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) - def handler(parent, _io__raw__m_pages=_io__raw__m_pages): + # NOTE early binding of i + # https://github.com/kaitai-io/kaitai_struct/issues/1246 + def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i): self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): raise kaitaistruct.ConsistencyError(u"raw(pages)", len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]), ((self.header.page_size - 100) if (i == 0) else self.header.page_size)) @@ -2057,7 +2059,8 @@ def handler(parent, _io__raw__m_pages=_io__raw__m_pages): self._io.add_child_stream(_io__raw__m_pages) _pos2 = self._io.pos() self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) - def handler(parent, _io__raw__m_pages=_io__raw__m_pages): + # NOTE early binding of i + def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i): self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): raise kaitaistruct.ConsistencyError(u"raw(pages)", len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]), ((self.header.page_size - 100) if (i == 0) else self.header.page_size)) @@ -2070,7 +2073,8 @@ def handler(parent, _io__raw__m_pages=_io__raw__m_pages): self._io.add_child_stream(_io__raw__m_pages) _pos2 = self._io.pos() self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) - def handler(parent, _io__raw__m_pages=_io__raw__m_pages): + # NOTE early binding of i + def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i): self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): raise kaitaistruct.ConsistencyError(u"raw(pages)", len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]), ((self.header.page_size - 100) if (i == 0) else self.header.page_size)) From 66580c9350f0bafb016f4682327a720245680f5d Mon Sep 17 00:00:00 2001 From: Milan Hauth Date: Mon, 4 Aug 2025 17:43:04 +0200 Subject: [PATCH 10/10] fix positional instances --- serialize_py/codegen_result.py | 117 +++++++++++++- serialize_py/kaitai_serialize_codegen.py | 195 +++++++++++++++++++---- serialize_py/kaitaistruct_sqlite3.py | 140 +++++++++++----- serialize_py/kaitaistruct_sqlite3.py.sh | 6 + serialize_py/sqlite3.ksy | 43 +++-- 5 files changed, 405 insertions(+), 96 deletions(-) diff --git a/serialize_py/codegen_result.py b/serialize_py/codegen_result.py index ffd397d..564e398 100644 --- a/serialize_py/codegen_result.py +++ b/serialize_py/codegen_result.py @@ -1,6 +1,8 @@ import io import kaitaistruct import kaitaistruct_sqlite3 +import vlq_base128_be +import pyvlq root_size = 8192 @@ -50,8 +52,120 @@ def init_page(page): page.cell_pointers.append(kaitaistruct_sqlite3.Sqlite3.CellPointer(_io=root._io, _parent=page, _root=page._root)) def init_cell_pointer(cell_pointer): cell_pointer.ofs_content = 4044 # 0xfcc + cell_pointer.content = kaitaistruct_sqlite3.Sqlite3.TableLeafCell(_io=root._io, _parent=cell_pointer, _root=cell_pointer._root) + def init_content(content): + content.payload_size = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(50)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + content.payload_size._read() + content.row_id = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(1)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + content.row_id._read() + content.payload = kaitaistruct_sqlite3.Sqlite3.Record(_io=root._io, _parent=content, _root=content._root) + def init_payload(payload): + payload.header_size = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(6)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + payload.header_size._read() + payload._raw_header = b'\x17\x15\x15\x01I' + payload.header = kaitaistruct_sqlite3.Sqlite3.RecordHeader(_io=root._io, _parent=payload, _root=payload._root) + def init_header(header): + header.value_types = [] + header.value_types.append(kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=header, _root=header._root)) + def init_value_type(value_type): + value_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(23)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_type.raw_value._read() + init_value_type(header.value_types[0]) + header.value_types.append(kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=header, _root=header._root)) + def init_value_type(value_type): + value_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(21)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_type.raw_value._read() + init_value_type(header.value_types[1]) + header.value_types.append(kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=header, _root=header._root)) + def init_value_type(value_type): + value_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(21)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_type.raw_value._read() + init_value_type(header.value_types[2]) + header.value_types.append(kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=header, _root=header._root)) + def init_value_type(value_type): + value_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(1)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_type.raw_value._read() + init_value_type(header.value_types[3]) + header.value_types.append(kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=header, _root=header._root)) + def init_value_type(value_type): + value_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(73)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_type.raw_value._read() + init_value_type(header.value_types[4]) + init_header(payload.header) + payload.values = [] + def get_value_serial_type(): + value_serial_type = kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=payload, _root=payload._root) + value_serial_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(23)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_serial_type.raw_value._read() + return value_serial_type + payload.values.append(kaitaistruct_sqlite3.Sqlite3.Value(serial_type=get_value_serial_type(), _io=root._io, _parent=payload, _root=payload._root)) + def init_value(value): + value.value = kaitaistruct_sqlite3.Sqlite3.StringUtf8(len_value=5, _io=root._io, _parent=value, _root=value._root) + def init_value(value): + value.value = 'table' + init_value(value.value) + init_value(payload.values[0]) + def get_value_serial_type(): + value_serial_type = kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=payload, _root=payload._root) + value_serial_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(21)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_serial_type.raw_value._read() + return value_serial_type + payload.values.append(kaitaistruct_sqlite3.Sqlite3.Value(serial_type=get_value_serial_type(), _io=root._io, _parent=payload, _root=payload._root)) + def init_value(value): + value.value = kaitaistruct_sqlite3.Sqlite3.StringUtf8(len_value=4, _io=root._io, _parent=value, _root=value._root) + def init_value(value): + value.value = 'test' + init_value(value.value) + init_value(payload.values[1]) + def get_value_serial_type(): + value_serial_type = kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=payload, _root=payload._root) + value_serial_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(21)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_serial_type.raw_value._read() + return value_serial_type + payload.values.append(kaitaistruct_sqlite3.Sqlite3.Value(serial_type=get_value_serial_type(), _io=root._io, _parent=payload, _root=payload._root)) + def init_value(value): + value.value = kaitaistruct_sqlite3.Sqlite3.StringUtf8(len_value=4, _io=root._io, _parent=value, _root=value._root) + def init_value(value): + value.value = 'test' + init_value(value.value) + init_value(payload.values[2]) + def get_value_serial_type(): + value_serial_type = kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=payload, _root=payload._root) + value_serial_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(1)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_serial_type.raw_value._read() + return value_serial_type + payload.values.append(kaitaistruct_sqlite3.Sqlite3.Value(serial_type=get_value_serial_type(), _io=root._io, _parent=payload, _root=payload._root)) + def init_value(value): + value.value = 2 + init_value(payload.values[3]) + def get_value_serial_type(): + value_serial_type = kaitaistruct_sqlite3.Sqlite3.SerialType(_io=root._io, _parent=payload, _root=payload._root) + value_serial_type.raw_value = vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode(73)) + # fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups' + value_serial_type.raw_value._read() + return value_serial_type + payload.values.append(kaitaistruct_sqlite3.Sqlite3.Value(serial_type=get_value_serial_type(), _io=root._io, _parent=payload, _root=payload._root)) + def init_value(value): + value.value = kaitaistruct_sqlite3.Sqlite3.StringUtf8(len_value=30, _io=root._io, _parent=value, _root=value._root) + def init_value(value): + value.value = 'CREATE TABLE test (id INTEGER)' + init_value(value.value) + init_value(payload.values[4]) + init_payload(content.payload) + init_content(cell_pointer.content) init_cell_pointer(page.cell_pointers[0]) - page.cell_content_area = b'2\x01\x06\x17\x15\x15\x01Itabletesttest\x02CREATE TABLE test (id INTEGER)' page.reserved_space = None init_page(root.pages[0]) root.pages.append(kaitaistruct_sqlite3.Sqlite3.BtreePage(page_number=2, _io=root._io, _parent=root, _root=root._root)) @@ -62,7 +176,6 @@ def init_page(page): page.ofs_cell_content_area_raw = 4096 # 0x1000 page.num_frag_free_bytes = 0 page.cell_pointers = [] - page.cell_content_area = b'' page.reserved_space = None init_page(root.pages[1]) if check: diff --git a/serialize_py/kaitai_serialize_codegen.py b/serialize_py/kaitai_serialize_codegen.py index 300efb2..1edd5f9 100755 --- a/serialize_py/kaitai_serialize_codegen.py +++ b/serialize_py/kaitai_serialize_codegen.py @@ -10,6 +10,9 @@ # parse_page_by_page = True parse_page_by_page = False +debug_codegen_tree = False +# debug_codegen_tree = True + import io import re import os @@ -96,6 +99,12 @@ def f(k): keys = list(filter(f, keys)) return keys +def get_unique_list(seq): + # https://stackoverflow.com/questions/480214/how-do-i-remove-duplicates-from-a-list-while-preserving-order + seen = set() + seen_add = seen.add + return [x for x in seq if not (x in seen or seen_add(x))] + def get_seq(obj): # TODO upstream: this should be simpler if not hasattr(obj, "_read"): @@ -123,18 +132,38 @@ def get_seq(obj): continue # list type m = re.match(r"\s+self\.(\w+) = \[\]", line) + if m: + seq.append(m[1]) + continue + # string type + # self.value = (self._io.read_bytes(self.len_value)).decode("UTF-8") + m = re.match(r'\s+self\.(\w+) = \(self\._io\.read_bytes\(self\.(\w+)\)\)\.decode\("([^"]+)"\)', line) + if m: + seq.append(m[1]) + continue + # "repeat: eos" type + # self._raw_header = self._io.read_bytes((self.header_size.value - 1)) + # _io__raw_header = KaitaiStream(BytesIO(self._raw_header)) + # self.header = Sqlite3.RecordHeader(_io__raw_header, self, self._root) + # FIXME also capture init parameters + m = re.match(r"\s+self\.(\w+) = (\w+)\.(\w+)\((_io_\w+)(?:, self, self\._root)?\)", line) if m: seq.append(m[1]) continue # user-defined types # self.header = Sqlite3.DatabaseHeader(self._io, self, self._root) - m = re.match(r"\s+self\.(\w+) = (\w+)\.(\w+)\(self\._io, self, self\._root\)", line) + # self.raw_value = vlq_base128_be.VlqBase128Be(self._io) + # m = re.match(r"\s+self\.(\w+) = (\w+)\.(\w+)\(self\._io, self, self\._root\)", line) + # FIXME also capture init parameters for parentless objects + # - value_serial_type.raw_value = vlq_base128_be.VlqBase128Be(_io=root._io, _parent=value_serial_type, _root=value_serial_type._root) + # + value_serial_type.raw_value = vlq_base128_be.VlqBase128Be(_io=root._io) + m = re.match(r"\s+self\.(\w+) = (\w+)\.(\w+)\(self\._io(?:, self, self\._root)?\)", line) if m: # print("m", m.groups()) # key, mod, member = m.groups() seq.append(m[1]) continue - return seq + return get_unique_list(seq) def get_instances(obj): # TODO upstream: this should be simpler @@ -152,7 +181,7 @@ def get_instances(obj): if m: instances.append(m[1]) continue - return instances + return get_unique_list(instances) def parse_enum_map(lines): enum_map = dict() @@ -243,6 +272,21 @@ class E # 0 # E # class Sqlite3(ReadWriteKaitaiStruct): # class FormatVersion(IntEnum): +def get_singular_name(plural_name): + # vals -> val + # val_list -> val + if plural_name.endswith("_list"): return plural_name[:-5] + if plural_name.endswith("_array"): return plural_name[:-6] + if plural_name.endswith("s"): return plural_name[:-1] + return plural_name + +def is_atom(val): + if isinstance(val, int): return True + if isinstance(val, bytes): return True + if isinstance(val, str): return True + if isinstance(val, float): return True # ? + # list, dict?, user-defined type + return False debug_init_types = False @@ -262,16 +306,19 @@ def codegen( module_map={}, global_names=[], ): - print("codegen obj", obj) global val # fix print_value + ind = indent_level * indent_step + ids = indent_step mod = obj.__class__.__module__ # member = obj.__class__.__name__ # DatabaseHeader member = obj.__class__.__qualname__ # Sqlite3.DatabaseHeader - """ - print("obj.__class__.__module__", obj.__class__.__module__) - print("obj.__class__.__name__", obj.__class__.__name__) - print("obj.__class__.__qualname__", obj.__class__.__qualname__) - """ + if debug_codegen_tree: + # debug + print(f"{ind}{ids}# line 290", file=out) + print(f"{ind}{ids}# codegen obj {on!r} {obj!r}", obj.__class__.__module__, file=out) + print(f"{ind}{ids}# obj.__class__.__module__", obj.__class__.__module__, file=out) + print(f"{ind}{ids}# obj.__class__.__name__", obj.__class__.__name__, file=out) + print(f"{ind}{ids}# obj.__class__.__qualname__", obj.__class__.__qualname__, file=out) is_root = True if on_parent == None else False if is_root: root = obj @@ -285,13 +332,13 @@ def codegen( global_names.append(mod) # TODO add imports of dependencies. example: vlq_base128_be for sqlite3 # root_cln = root.__class__.__qualname__ - ind = indent_level * indent_step - ids = indent_step if is_root: print(f"{ind}import io", file=out) print(f"{ind}import kaitaistruct", file=out) print(f"{ind}import {mod}", file=out) # TODO add imports of dependencies. example: vlq_base128_be for sqlite3 + print(f"{ind}import vlq_base128_be", file=out) + print(f"{ind}import pyvlq", file=out) # print(f"{ind}# root init", file=out) print("", file=out) print(f"{ind}root_size = {root._io._size}", file=out) @@ -322,7 +369,9 @@ def codegen( # else: # print(f"{ind}{ids}# non-root init", file=out) # print(f"{ind}{ids}{on} = {mod}.{member}(_io, {on_parent}, {on_parent}._root)", file=out) - key_stack = queue.deque(get_seq(obj) + get_instances(obj)) + key_stack = queue.deque(get_unique_list(get_seq(obj) + get_instances(obj))) + if debug_codegen_tree: + print("key_stack", list(key_stack)) while key_stack: key = key_stack.popleft() # print(f"{ind}{ids}# key {key}", file=out) @@ -343,19 +392,26 @@ def codegen( val = getattr(obj, key) except AttributeError: continue - """ - print("key", repr(key)) - print("val", repr(val), dir(val)) - print_value("val.__class__.__module__") - print_value("val.__class__.__qualname__") - """ + if debug_codegen_tree: + print(f"{ind}{ids}# line 370: key_stack step", file=out) + print(f"{ind}{ids}# key_stack {list(key_stack)!r}", file=out) + print(f"{ind}{ids}# key {key!r}", file=out) + print(f"{ind}{ids}# val {val!r} {dir(val)}", file=out) + print(f"{ind}{ids}# val.__class__.__module__ {val.__class__.__module__}", file=out) + print(f"{ind}{ids}# val.__class__.__name__ {val.__class__.__name__}", file=out) + print(f"{ind}{ids}# val.__class__.__qualname__ {val.__class__.__qualname__}", file=out) + if val_is_list_item: + print(f"{ind}{ids}# val_is_list_item True", file=out) + print(f"{ind}{ids}# val_arr_name {val_arr_name}", file=out) + print(f"{ind}{ids}# val_arr_idx {val_arr_idx}", file=out) + print(f"{ind}{ids}# val_arr {val_arr}", file=out) # obj.__class__.__module__ == 'builtins' # TODO rename to "mod_name" mod = val.__class__.__module__ # TODO rename to "member_name" member = val.__class__.__qualname__ - # builtin types: int, bytes, ... + # builtin types: int, bytes, list, ... if mod == "builtins": if debug_init_types: print(f"{ind}{ids}# builtin type {type(val).__name__}", file=out) @@ -371,10 +427,14 @@ def codegen( print(f"{ind}{ids}{on}.{key} = {len(val)} * b'\\x00'", file=out) continue if isinstance(val, list): + if debug_codegen_tree: + print(f"{ind}{ids}# line 410: val is a list", file=out) print(f"{ind}{ids}{on}.{key} = []", file=out) new_keys = [] for item_idx in range(len(val)): new_keys.append(f"{key}[{item_idx}]") + if debug_codegen_tree: + print(f"{ind}{ids}# line 415: recursion via key_stack: new_keys {new_keys}", file=out) # recursion via stack new_keys.reverse() # extendleft adds values in reverse order key_stack.extendleft(new_keys) @@ -432,6 +492,19 @@ def codegen( # print(f"{ind}{ids}{on}.{key} = {mod}.{member}(root._io, {on}, {on}._root)", file=out) # long val_params = [] if hasattr(val, "__init__"): + if val.__class__.__name__ == "VlqBase128Be": + val_expr = f"vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode({val.value}))" + if debug_codegen_tree: + print(f"{ind}{ids}# line 480: val_expr", file=out) + if val_is_list_item: + print(f"{ind}{ids}{on}.{val_arr_name}.append({val_expr})", file=out) + print(f"{ind}{ids}# fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups'", file=out) + print(f"{ind}{ids}{on}.{val_arr_name}[-1]._read()", file=out) + else: + print(f"{ind}{ids}{on}.{key} = {val_expr}", file=out) + print(f"{ind}{ids}# fix: AttributeError: 'VlqBase128Be' object has no attribute 'groups'", file=out) + print(f"{ind}{ids}{on}.{key}._read()", file=out) + continue val_init_sig = inspect.signature(val.__init__) if str(val_init_sig) != "(_io=None, _parent=None, _root=None)": # print("val_init_sig", repr(val_init_sig)) @@ -449,19 +522,83 @@ def get_page_number(): pages.append(BtreePage(page_number=get_page_number(), _io=root._io, _parent=root, _root=root._root)) """ param_val = getattr(val, param_name) - val_params.append(f"{param_name}={param_val}") + param_mod = param_val.__class__.__module__ + param_member = param_val.__class__.__qualname__ + if debug_codegen_tree: + # debug + print(f"{ind}{ids}# line 484 param_val {param_val}", file=out) + print(f"{ind}{ids}# line 484 param_val.__class__.__name__ {param_val.__class__.__name__}", file=out) + if is_atom(param_val): + param_val_expr = param_val + # not reached + # elif param_val.__class__.__name__ == "VlqBase128Be": + # # TODO move up imports + # print(f"{ind}{ids}import vlq_base128_be, pyvlq", file=out) + # print(f"{ind}{ids}# line 489", file=out) + # param_val_expr = f"vlq_base128_be.VlqBase128Be.from_bytes(pyvlq.encode({param_val.value}))" + else: + # val_params.append(f"{param_name}={param_val}") + if val_is_list_item: + _on = get_singular_name(val_arr_name) + local_param_name = get_local_key(f"{_on}_{param_name}", global_names) + else: + local_param_name = get_local_key(f"{on}_{param_name}", global_names) + # FIXME refactor + param_val_params = "" # FIXME add params + _param_val_expr = f"{param_mod}.{param_member}({param_val_params}_io=root._io, _parent={on}, _root={on}._root)" + if debug_codegen_tree: + print(f"{ind}{ids}# line 510: recursion via call", file=out) + # print(f"{ind}{ids}# local_param_name {local_param_name}", file=out) # debug + print(f"{ind}{ids}def get_{local_param_name}():", file=out) + print(f"{ind}{ids}{ids}{local_param_name} = {_param_val_expr}", file=out) + # recursion via call + val_before_recursion = val + local_param_name_before_recursion = local_param_name + codegen( + param_val, + out, + local_param_name, # "value_serial_type" + on, # "payload" + root, + root_name, + indent_step, + (indent_level + 1), + enum_map_map, + module_map, + global_names, + ) + # assert val_before_recursion == val # AssertionError + # assert local_param_name_before_recursion == local_param_name + # fix: restore loop variables + # TODO more? + val = val_before_recursion + print(f"{ind}{ids}{ids}return {local_param_name}", file=out) + # raise 123 # debug + param_val_expr = f"get_{local_param_name}()" + val_params.append(f"{param_name}={param_val_expr}") val_params = "".join(map(lambda arg: arg + ", ", val_params)) + # FIXME handle parentless objects + if debug_codegen_tree: + print(f"{ind}{ids}# line 530", file=out) + print(f"{ind}{ids}# mod {mod!r}", file=out) + print(f"{ind}{ids}# member {member!r}", file=out) + print(f"{ind}{ids}# val_params {val_params!r}", file=out) + val_expr = f"{mod}.{member}({val_params}_io=root._io, _parent={on}, _root={on}._root)" + if val_expr.startswith("kaitaistruct_sqlite3.Sqlite3.Value"): + if debug_codegen_tree: + # debug + # kaitaistruct_sqlite3.Sqlite3.Value(serial_type=get_value_serial_type(), _io=root._io, _parent=payload, _root=payload._root) + print(f"{ind}{ids}# line 540", file=out) + print(f"{ind}{ids}# val {val}", file=out) + print(f"{ind}{ids}# val.__class__ {val.__class__}", file=out) + print(f"{ind}{ids}# val.__class__.__module__ {val.__class__.__module__}", file=out) + print(f"{ind}{ids}# val.__class__.__name__ {val.__class__.__name__}", file=out) + print(f"{ind}{ids}# val.__class__.__qualname__ {val.__class__.__qualname__}", file=out) + print(f"{ind}{ids}# val._read {val._read}", file=out) if val_is_list_item: - print(f"{ind}{ids}{on}.{val_arr_name}.append({mod}.{member}({val_params}_io=root._io, _parent={on}, _root={on}._root))", file=out) # long + print(f"{ind}{ids}{on}.{val_arr_name}.append({val_expr})", file=out) else: - print(f"{ind}{ids}{on}.{key} = {mod}.{member}({val_params}_io=root._io, _parent={on}, _root={on}._root)", file=out) # long - def get_singular_name(plural_name): - # vals -> val - # val_list -> val - if plural_name.endswith("_list"): return plural_name[:-5] - if plural_name.endswith("_array"): return plural_name[:-6] - if plural_name.endswith("s"): return plural_name[:-1] - return plural_name + print(f"{ind}{ids}{on}.{key} = {val_expr}", file=out) # avoid shadowing global variables if val_is_list_item: local_key = get_local_key(get_singular_name(val_arr_name), global_names) diff --git a/serialize_py/kaitaistruct_sqlite3.py b/serialize_py/kaitaistruct_sqlite3.py index 88b0075..217306d 100644 --- a/serialize_py/kaitaistruct_sqlite3.py +++ b/serialize_py/kaitaistruct_sqlite3.py @@ -247,8 +247,6 @@ def __init__(self, page_number, _io=None, _parent=None, _root=None): self._parent = _parent self._root = _root self.page_number = page_number - self._should_write_cell_content_area = False - self.cell_content_area__to_write = True self._should_write_reserved_space = False self.reserved_space__to_write = True @@ -281,7 +279,6 @@ def _fetch_instances(self): pass self.cell_pointers[i]._fetch_instances() - _ = self.cell_content_area if (self._root.header.page_reserved_space_size != 0): pass _ = self.reserved_space @@ -290,7 +287,6 @@ def _fetch_instances(self): def _write__seq(self, io=None): super(Sqlite3.BtreePage, self)._write__seq(io) - self._should_write_cell_content_area = self.cell_content_area__to_write self._should_write_reserved_space = self.reserved_space__to_write self._io.write_u1(int(self.page_type)) self._io.write_u2be(self.first_freeblock) @@ -337,40 +333,6 @@ def ofs_cell_content_area(self): def _invalidate_ofs_cell_content_area(self): del self._m_ofs_cell_content_area @property - def cell_content_area(self): - """We parse the first page separate from the 100 byte database header, - so for the first page, we have to subtract 100 from the offset, - to make the offset relative to our "page". - """ - if self._should_write_cell_content_area: - self._write_cell_content_area() - if hasattr(self, '_m_cell_content_area'): - return self._m_cell_content_area - - _pos = self._io.pos() - self._io.seek(((self.ofs_cell_content_area - 100) if (self.page_number == 1) else self.ofs_cell_content_area)) - self._m_cell_content_area = self._io.read_bytes((self._root.header.usable_size - self.ofs_cell_content_area)) - self._io.seek(_pos) - return getattr(self, '_m_cell_content_area', None) - - @cell_content_area.setter - def cell_content_area(self, v): - self._m_cell_content_area = v - - def _write_cell_content_area(self): - self._should_write_cell_content_area = False - _pos = self._io.pos() - self._io.seek(((self.ofs_cell_content_area - 100) if (self.page_number == 1) else self.ofs_cell_content_area)) - self._io.write_bytes(self.cell_content_area) - self._io.seek(_pos) - - - def _check_cell_content_area(self): - pass - if (len(self.cell_content_area) != (self._root.header.usable_size - self.ofs_cell_content_area)): - raise kaitaistruct.ConsistencyError(u"cell_content_area", len(self.cell_content_area), (self._root.header.usable_size - self.ofs_cell_content_area)) - - @property def reserved_space(self): if self._should_write_reserved_space: self._write_reserved_space() @@ -1596,6 +1558,8 @@ def __init__(self, _io=None, _parent=None, _root=None): self._io = _io self._parent = _parent self._root = _root + self._should_write_content = False + self.content__to_write = True def _read(self): self.ofs_content = self._io.read_u2be() @@ -1603,16 +1567,112 @@ def _read(self): def _fetch_instances(self): pass + _ = self.content + _on = self._parent.page_type + if _on == Sqlite3.BtreePageType.table_leaf_page: + pass + self.content._fetch_instances() + elif _on == Sqlite3.BtreePageType.table_interior_page: + pass + self.content._fetch_instances() + elif _on == Sqlite3.BtreePageType.index_leaf_page: + pass + self.content._fetch_instances() + elif _on == Sqlite3.BtreePageType.index_interior_page: + pass + self.content._fetch_instances() def _write__seq(self, io=None): super(Sqlite3.CellPointer, self)._write__seq(io) + self._should_write_content = self.content__to_write self._io.write_u2be(self.ofs_content) def _check(self): pass + @property + def content(self): + if self._should_write_content: + self._write_content() + if hasattr(self, '_m_content'): + return self._m_content + + _pos = self._io.pos() + self._io.seek((((-100 if (self._parent.page_number == 1) else 0) + ((self._parent.page_number - 1) * self._root.header.page_size)) + self.ofs_content)) + _on = self._parent.page_type + if _on == Sqlite3.BtreePageType.table_leaf_page: + pass + self._m_content = Sqlite3.TableLeafCell(self._io, self, self._root) + self._m_content._read() + elif _on == Sqlite3.BtreePageType.table_interior_page: + pass + self._m_content = Sqlite3.TableInteriorCell(self._io, self, self._root) + self._m_content._read() + elif _on == Sqlite3.BtreePageType.index_leaf_page: + pass + self._m_content = Sqlite3.IndexLeafCell(self._io, self, self._root) + self._m_content._read() + elif _on == Sqlite3.BtreePageType.index_interior_page: + pass + self._m_content = Sqlite3.IndexInteriorCell(self._io, self, self._root) + self._m_content._read() + self._io.seek(_pos) + return getattr(self, '_m_content', None) + + @content.setter + def content(self, v): + self._m_content = v + + def _write_content(self): + self._should_write_content = False + _pos = self._io.pos() + self._io.seek((((-100 if (self._parent.page_number == 1) else 0) + ((self._parent.page_number - 1) * self._root.header.page_size)) + self.ofs_content)) + _on = self._parent.page_type + if _on == Sqlite3.BtreePageType.table_leaf_page: + pass + self.content._write__seq(self._io) + elif _on == Sqlite3.BtreePageType.table_interior_page: + pass + self.content._write__seq(self._io) + elif _on == Sqlite3.BtreePageType.index_leaf_page: + pass + self.content._write__seq(self._io) + elif _on == Sqlite3.BtreePageType.index_interior_page: + pass + self.content._write__seq(self._io) + self._io.seek(_pos) + + + def _check_content(self): + pass + _on = self._parent.page_type + if _on == Sqlite3.BtreePageType.table_leaf_page: + pass + if self.content._root != self._root: + raise kaitaistruct.ConsistencyError(u"content", self.content._root, self._root) + if self.content._parent != self: + raise kaitaistruct.ConsistencyError(u"content", self.content._parent, self) + elif _on == Sqlite3.BtreePageType.table_interior_page: + pass + if self.content._root != self._root: + raise kaitaistruct.ConsistencyError(u"content", self.content._root, self._root) + if self.content._parent != self: + raise kaitaistruct.ConsistencyError(u"content", self.content._parent, self) + elif _on == Sqlite3.BtreePageType.index_leaf_page: + pass + if self.content._root != self._root: + raise kaitaistruct.ConsistencyError(u"content", self.content._root, self._root) + if self.content._parent != self: + raise kaitaistruct.ConsistencyError(u"content", self.content._parent, self) + elif _on == Sqlite3.BtreePageType.index_interior_page: + pass + if self.content._root != self._root: + raise kaitaistruct.ConsistencyError(u"content", self.content._root, self._root) + if self.content._parent != self: + raise kaitaistruct.ConsistencyError(u"content", self.content._parent, self) + class Value(ReadWriteKaitaiStruct): def __init__(self, serial_type, _io=None, _parent=None, _root=None): @@ -2044,8 +2104,6 @@ def _write_pages(self): self._io.add_child_stream(_io__raw__m_pages) _pos2 = self._io.pos() self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) - # NOTE early binding of i - # https://github.com/kaitai-io/kaitai_struct/issues/1246 def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i): self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): @@ -2059,7 +2117,6 @@ def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i): self._io.add_child_stream(_io__raw__m_pages) _pos2 = self._io.pos() self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) - # NOTE early binding of i def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i): self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): @@ -2073,7 +2130,6 @@ def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i): self._io.add_child_stream(_io__raw__m_pages) _pos2 = self._io.pos() self._io.seek(self._io.pos() + (((self.header.page_size - 100) if (i == 0) else self.header.page_size))) - # NOTE early binding of i def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i): self._raw__m_pages.append(_io__raw__m_pages.to_byte_array()) if (len(self._raw__m_pages[(len(self._raw__m_pages) - 1)]) != ((self.header.page_size - 100) if (i == 0) else self.header.page_size)): diff --git a/serialize_py/kaitaistruct_sqlite3.py.sh b/serialize_py/kaitaistruct_sqlite3.py.sh index 220f331..dd1ad60 100755 --- a/serialize_py/kaitaistruct_sqlite3.py.sh +++ b/serialize_py/kaitaistruct_sqlite3.py.sh @@ -26,3 +26,9 @@ fi kaitai-struct-compiler --read-write --no-auto-read --target python --import-path kaitai_struct_formats/ kaitai_struct_formats/database/sqlite3.ksy mv sqlite3.py kaitaistruct_sqlite3.py + +# https://github.com/kaitai-io/kaitai_struct/issues/1246 +# fix: late binding breaks write_back_handler +s='s/def handler(parent, _io__raw__m_pages=_io__raw__m_pages):' +s+='/def handler(parent, _io__raw__m_pages=_io__raw__m_pages, i=i):/' +sed -i "$s" kaitaistruct_sqlite3.py diff --git a/serialize_py/sqlite3.ksy b/serialize_py/sqlite3.ksy index b83a0e3..82d15ed 100644 --- a/serialize_py/sqlite3.ksy +++ b/serialize_py/sqlite3.ksy @@ -388,14 +388,14 @@ types: instances: ofs_cell_content_area: value: 'ofs_cell_content_area_raw == 0 ? 65536 : ofs_cell_content_area_raw' - cell_content_area: - # pos: ofs_cell_content_area - pos: 'page_number == 1 ? (ofs_cell_content_area - 100) : ofs_cell_content_area' - size: _root.header.usable_size - ofs_cell_content_area - doc: | - We parse the first page separate from the 100 byte database header, - so for the first page, we have to subtract 100 from the offset, - to make the offset relative to our "page". + # cell_content_area: + # # pos: ofs_cell_content_area + # pos: 'page_number == 1 ? (ofs_cell_content_area - 100) : ofs_cell_content_area' + # size: _root.header.usable_size - ofs_cell_content_area + # doc: | + # We parse the first page separate from the 100 byte database header, + # so for the first page, we have to subtract 100 from the offset, + # to make the offset relative to our "page". reserved_space: pos: _root.header.page_size - _root.header.page_reserved_space_size size-eos: true @@ -404,21 +404,18 @@ types: seq: - id: ofs_content type: u2 -# FIXME this breaks serialization: -# _io__raw_header = KaitaiStream(BytesIO(bytearray((self.header_size.value - 1)))) -# ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# ValueError: negative count -# instances: -# content: -# # ofs_content is relative to page -# pos: ((_parent.page_number - 1) * _root.header.page_size) + ofs_content -# type: -# switch-on: _parent.page_type -# cases: -# btree_page_type::table_leaf_page: table_leaf_cell -# btree_page_type::table_interior_page: table_interior_cell -# btree_page_type::index_leaf_page: index_leaf_cell -# btree_page_type::index_interior_page: index_interior_cell + instances: + content: + # ofs_content is relative to page + # pos: ((_parent.page_number - 1) * _root.header.page_size) + ofs_content + pos: '(_parent.page_number == 1 ? -100 : 0) + ((_parent.page_number - 1) * _root.header.page_size) + ofs_content' + type: + switch-on: _parent.page_type + cases: + btree_page_type::table_leaf_page: table_leaf_cell + btree_page_type::table_interior_page: table_interior_cell + btree_page_type::index_leaf_page: index_leaf_cell + btree_page_type::index_interior_page: index_interior_cell table_leaf_cell: doc-ref: 'https://www.sqlite.org/fileformat2.html#b_tree_pages' seq: