From 578f0393fe380ca3e45d4851fdf4d581d05ff75c Mon Sep 17 00:00:00 2001 From: Jean Do Date: Thu, 28 Aug 2025 10:46:57 -0400 Subject: [PATCH 1/4] initial commit of re-prioritizing plain data above interpretting as KEF. --- src/krux/pages/home_pages/addresses.py | 34 ++++++--- .../pages/home_pages/wallet_descriptor.py | 73 ++++++++++++------- tests/pages/home_pages/test_addresses.py | 73 +++++++++++++++++++ .../home_pages/test_wallet_descriptor.py | 56 ++++++++++++++ 4 files changed, 199 insertions(+), 37 deletions(-) diff --git a/src/krux/pages/home_pages/addresses.py b/src/krux/pages/home_pages/addresses.py index ceeb2fea9..b024dca6c 100644 --- a/src/krux/pages/home_pages/addresses.py +++ b/src/krux/pages/home_pages/addresses.py @@ -283,7 +283,6 @@ def _scan_highlight_addr(self, result_message): def scan_address(self, addr_type=0): """Handler for the 'receive' or 'change' menu item""" from ..qr_capture import QRCodeCapture - from ..encryption_ui import decrypt_kef qr_capture = QRCodeCapture(self.ctx) data, qr_format = qr_capture.qr_capture_loop() @@ -291,21 +290,34 @@ def scan_address(self, addr_type=0): self.flash_error(t("Failed to load")) return MENU_CONTINUE - try: - data = decrypt_kef(self.ctx, data).decode() - except KeyError: - self.flash_error(t("Failed to decrypt")) - return MENU_CONTINUE - except ValueError: - # ValueError=not KEF or declined to decrypt - pass + from ...wallet import parse_address addr = None try: - from ...wallet import parse_address - addr = parse_address(data) except: + pass + + if addr is None: + from ..encryption_ui import decrypt_kef + + try: + data = decrypt_kef(self.ctx, data).decode() + except ValueError: + # ValueError=not KEF or declined to decrypt + data = None + except: + # KeyError or other exception during decryption + self.flash_error(t("Failed to decrypt")) + return MENU_CONTINUE + + if data is not None: + try: + addr = parse_address(data) + except: + pass + + if addr is None: self.flash_error(t("Invalid address")) return MENU_CONTINUE diff --git a/src/krux/pages/home_pages/wallet_descriptor.py b/src/krux/pages/home_pages/wallet_descriptor.py index 13895fea3..d7abf3ae8 100644 --- a/src/krux/pages/home_pages/wallet_descriptor.py +++ b/src/krux/pages/home_pages/wallet_descriptor.py @@ -125,6 +125,26 @@ def wallet(self): def _load_wallet(self): """Load a wallet output descriptor from the camera or SD card""" + def load_wallet_or_exception(wallet_data, qr_format): + from ...wallet import AssumptionWarning + + wallet_load_exception = None + + try: + wallet.load(wallet_data, qr_format) + except AssumptionWarning as e: + self.ctx.display.clear() + self.ctx.display.draw_centered_text(e.args[0], theme.error_color) + if self.prompt(t("Accept assumption?"), BOTTOM_PROMPT_LINE): + try: + wallet.load(wallet_data, qr_format, allow_assumption=e.args[1]) + except Exception as e_again: + wallet_load_exception = e_again + except Exception as e: + wallet_load_exception = e + + return wallet_load_exception + persisted = False load_method = self.load_method() if load_method == LOAD_FROM_CAMERA: @@ -152,41 +172,42 @@ def _load_wallet(self): else: # Cancel return MENU_CONTINUE - self.ctx.display.clear() - self.ctx.display.draw_centered_text(t("Processing…")) if wallet_data is None: # Camera or SD card loading failed! self.flash_error(t("Failed to load")) return MENU_CONTINUE - from ..encryption_ui import decrypt_kef - - try: - wallet_data = decrypt_kef(self.ctx, wallet_data).decode() - except KeyError: - self.flash_error(t("Failed to decrypt")) - return MENU_CONTINUE - except ValueError: - # ValueError=not KEF or declined to decrypt - pass - - from ...wallet import Wallet, AssumptionWarning + from ...wallet import Wallet + self.ctx.display.clear() + self.ctx.display.draw_centered_text(t("Processing…")) wallet = Wallet(self.ctx.wallet.key) wallet.persisted = persisted - wallet_load_exception = None - try: - wallet.load(wallet_data, qr_format) - except AssumptionWarning as e: - self.ctx.display.clear() - self.ctx.display.draw_centered_text(e.args[0], theme.error_color) - if self.prompt(t("Accept assumption?"), BOTTOM_PROMPT_LINE): + + wallet_load_exception = load_wallet_or_exception(wallet_data, qr_format) + + if not wallet.is_loaded(): + from ..encryption_ui import decrypt_kef + + if isinstance(wallet_data, bytes): try: - wallet.load(wallet_data, qr_format, allow_assumption=e.args[1]) - except Exception as e_again: - wallet_load_exception = e_again - except Exception as e: - wallet_load_exception = e + wallet_data = wallet_data.decode() + except UnicodeDecodeError: + pass + + try: + wallet_data = decrypt_kef(self.ctx, wallet_data) + except ValueError: + # ValueError=not KEF or declined to decrypt + pass + except: + # KeyError or other exception during decryption + self.flash_error(t("Failed to decrypt")) + return MENU_CONTINUE + + if wallet_data: + wallet_load_exception = load_wallet_or_exception(wallet_data, qr_format) + if wallet_load_exception: self.ctx.display.clear() self.ctx.display.draw_centered_text( diff --git a/tests/pages/home_pages/test_addresses.py b/tests/pages/home_pages/test_addresses.py index 54e2d6309..db50151fe 100644 --- a/tests/pages/home_pages/test_addresses.py +++ b/tests/pages/home_pages/test_addresses.py @@ -713,6 +713,79 @@ def test_scan_address_fails_on_decrypt_kef_key_error(mocker, m5stickv): "Failed to decrypt", 248, 2000, highlight_prefix="" ) + # will be ValueError, if user fails to load key + btn_seq = [ + BUTTON_ENTER, # confirm decrypt + BUTTON_PAGE_PREV, # back to "Go" + BUTTON_PAGE_PREV, # back to Esc + BUTTON_ENTER, # go Esc + BUTTON_PAGE_PREV, # deny key '' + BUTTON_ENTER, + ] + ctx = create_ctx(mocker, btn_seq) + addresses_ui = Addresses(ctx) + assert addresses_ui.scan_address() == MENU_CONTINUE + # assert ctx.input.wait_for_button.call_count == len(btn_seq) + print(ctx.display.method_calls) + ctx.display.flash_text.assert_called_with( + "Invalid address", 248, 2000, highlight_prefix="" + ) + + +def test_scan_kef_address(mocker, m5stickv, tdata): + from krux.input import BUTTON_ENTER, BUTTON_PAGE, BUTTON_PAGE_PREV + from krux.pages.home_pages.addresses import Addresses + from krux.wallet import Wallet + from krux.pages.qr_capture import QRCodeCapture + from krux.pages import MENU_CONTINUE + + # valid address encrypted w/ key="a" to test kef address + mocker.patch.object( + QRCodeCapture, + "qr_capture_loop", + new=lambda self: ( + "B2FkZHJlc3MFAAABARN21DbewFuk26WhryWxyfA6cI8i0ynYiahNPqsE7xq4YT3GaiG02/uDyY/8OhRMwW0l", + 0, + ), + ) + btn_seq = [ + BUTTON_ENTER, # confirm decrypt + BUTTON_ENTER, # type key + BUTTON_ENTER, # enter "a" + BUTTON_PAGE_PREV, # back to Go + BUTTON_ENTER, # go Go + BUTTON_ENTER, # confirm key "a" + BUTTON_ENTER, # dismiss addr qr + BUTTON_ENTER, # confirm to check address + BUTTON_ENTER, # dismiss results + ] + wallet = Wallet(tdata.SINGLESIG_12_WORD_KEY) + ctx = create_ctx(mocker, btn_seq, wallet, None) + addresses_ui = Addresses(ctx) + assert addresses_ui.scan_address() == MENU_CONTINUE + assert ctx.input.wait_for_button.call_count == len(btn_seq) + ctx.display.to_lines.assert_called_with( + "0.\n\nbc1q rhjq rz2d 9tdy m3p2 r9m2 vwzn 2sn2 yl6k 5m35 7y\n\nis a valid address!" + ) + + # nonsensical b"InvalidAddress" plaintext encrypted w/ key="a" to test kef non-address + mocker.patch.object( + QRCodeCapture, + "qr_capture_loop", + new=lambda self: ( + "B2ludmFsaWQFAAABLCM5eVz0ZezVL+rKbZhK26Kbog==", + 0, + ), + ) + btn_seq = btn_seq[:6] + ctx = create_ctx(mocker, btn_seq) + addresses_ui = Addresses(ctx) + assert addresses_ui.scan_address() == MENU_CONTINUE + assert ctx.input.wait_for_button.call_count == len(btn_seq) + ctx.display.flash_text.assert_called_with( + "Invalid address", 248, 2000, highlight_prefix="" + ) + def test_list_receive_addresses(mocker, m5stickv, tdata): from krux.format import format_address diff --git a/tests/pages/home_pages/test_wallet_descriptor.py b/tests/pages/home_pages/test_wallet_descriptor.py index a96aab294..8dd4292f9 100644 --- a/tests/pages/home_pages/test_wallet_descriptor.py +++ b/tests/pages/home_pages/test_wallet_descriptor.py @@ -225,6 +225,62 @@ def test_wallet_load_fails_on_decrypt_kef_key_error(mocker, m5stickv, tdata): ) +def test_load_kef_desc(mocker, m5stickv): + from krux.input import BUTTON_ENTER, BUTTON_PAGE, BUTTON_PAGE_PREV + from krux.pages.home_pages.wallet_descriptor import WalletDescriptor + from krux.wallet import Wallet + from krux.pages.qr_capture import QRCodeCapture + from krux.pages import MENU_CONTINUE + + # a single-sig kef descriptor encrypted with key 'a' + mocker.patch.object( + QRCodeCapture, + "qr_capture_loop", + new=lambda self: ( + "D3NpbmdsZS1zaWctZGVzYwUAAAEFLL1FjNrDp7xVh8cYVlRJHLS0n3fYWMQpyXYwHenPlZUkHQZrcsRbYrext1g7E6OzRt8g+D5kpjyLB6vBCmM/LCuJXpjYg8MO+fRkXUPqEBKteenYLDKFZbBumooJxfkHBoLXGSzNkBVbRjoIu/6uiVvRRCjB8cXexekIG3aZruuymy/0bGrLqRYX25Ns83lgegv4ygZAevWgBqFiAYPON6ld", + 0, + ), + ) + btn_seq = [ + BUTTON_ENTER, # confirm load + BUTTON_ENTER, # go load from camera + BUTTON_ENTER, # confirm decrypt + BUTTON_ENTER, # type key + BUTTON_ENTER, # enter "a" + BUTTON_PAGE_PREV, # back to Go + BUTTON_ENTER, # go Go + BUTTON_ENTER, # confirm key "a" + BUTTON_ENTER, # confirm "Load?"" + ] + wallet = Wallet(None) + ctx = create_ctx(mocker, btn_seq, wallet, None) + walletdescriptor_ui = WalletDescriptor(ctx) + assert walletdescriptor_ui.wallet() == MENU_CONTINUE + assert ctx.input.wait_for_button.call_count == len(btn_seq) + ctx.display.flash_text.assert_called_with( + "Wallet output descriptor loaded!", 65535, 2000, highlight_prefix="" + ) + + # nonsensical "Not a descriptor" plaintext encrypted w/ key="a" to test kef non-descriptor + mocker.patch.object( + QRCodeCapture, + "qr_capture_loop", + new=lambda self: ( + "B2ludmFsaWQFAAAB3zXZLXysZV8mHFzb9CM3z9wa7Q==", + 0, + ), + ) + wallet = Wallet(None) + ctx = create_ctx(mocker, btn_seq, wallet, None) + walletdescriptor_ui = WalletDescriptor(ctx) + assert walletdescriptor_ui.wallet() == MENU_CONTINUE + assert ctx.input.wait_for_button.call_count == len(btn_seq) + ctx.display.draw_centered_text.assert_called_with( + "Invalid wallet:\nValueError(\"Character ' ' is not a valid base58 character\")", + 248, + ) + + def test_load_desc_without_change(mocker, m5stickv, tdata): import krux From bc4b476f2931e88d789d6a6f447e5d9642dfe23c Mon Sep 17 00:00:00 2001 From: Jean Do Date: Tue, 2 Sep 2025 07:34:00 -0400 Subject: [PATCH 2/4] initial checkin of false-KEF probabilistic mitigation during unwrap() --- src/krux/kef.py | 13 +++++++++++-- tests/test_kef.py | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/krux/kef.py b/src/krux/kef.py index 42aab1a73..f624e0adb 100644 --- a/src/krux/kef.py +++ b/src/krux/kef.py @@ -500,8 +500,17 @@ def unwrap(kef_bytes): if VERSIONS[version].get("pkcs_pad", False) in (True, False): if (len(payload) - extra) % 16 != 0: raise ValueError("Ciphertext is not aligned") - if (len(payload) - extra) // 16 < 1: - raise ValueError("Ciphertext is too short") + if len(payload) < extra + 1: + raise ValueError("Ciphertext is too short") + + # TODO: consider better probabilistic strategies to rule-out KEF identification + # cipher payload should appear random + if len(payload) > 32: + upper128 = len([x for x in payload if x > 127]) + if upper128 == 0: + raise ValueError("Payload is ascii") + if upper128 < len(payload) * 0.001: + raise ValueError("Distribution of payload not uniform") return (id_, version, iterations, payload) diff --git a/tests/test_kef.py b/tests/test_kef.py index ba850ee4b..6e39ee420 100644 --- a/tests/test_kef.py +++ b/tests/test_kef.py @@ -935,7 +935,7 @@ def i2b(an_int): # mock values so that iterations will be at bytes[6:9] id_ = b"test" version = 0 - ciphertext = b"\x00" * 32 + ciphertext = bytes([x*8 for x in range(32)]) # if (iterations % 10000 == 0) they are serialized divided by 10000 for iterations in (ten_k, ten_k * 10, ten_k * 50, ten_k * ten_k): From 792c1356f1a821f89df2faf58983a45daacf1fa4 Mon Sep 17 00:00:00 2001 From: Jean Do Date: Wed, 3 Sep 2025 09:58:13 -0400 Subject: [PATCH 3/4] initial commit of kef-specifications section on false-kef identification --- .../encryption/kef-specifications.en.md | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/docs/getting-started/features/encryption/kef-specifications.en.md b/docs/getting-started/features/encryption/kef-specifications.en.md index d12443b86..122094bb8 100644 --- a/docs/getting-started/features/encryption/kef-specifications.en.md +++ b/docs/getting-started/features/encryption/kef-specifications.en.md @@ -86,7 +86,59 @@ It is expected that any implementation can decrypt a KEF envelope that was creat * **On truncated Authentication** At first glance it may be concerning that `auth` bytes for many versions have been truncated and are trivially "weak". Note that KEF's use-case for authentication is to validate that the user has correctly entered their decryption `key`. In the worse case, "false-authenticated" success will occur at a rate of 1:16M (or 1:4B for others) if using an incorrect decryption `key`; similar if an attacker has modified the KEF envelope. In these "false-authenticated" success cases, data will result from decryption, but that data will NOT be the original secret or plaintext; it will be of no value. -## Common Structure of a KEF Envelope + +### On Identifying a KEF Envelope + + As noted previously, only the owner of a KEF envelope, having the correct decryption key, can be certain that any string of bytes is indeed a KEF envelope. Because "auth" bytes are truncated, even a KEF implementation that successfully parses and decrypts an envelope without errors may not know if the plaintext was the original secret or "useless" bytes which resulted in validated decryption under another key for the same truncated `auth` bytes. + + This trait, whether deemed a desirable "feature" or an undesirable "bug" should be carefully considered by implementations that deal with KEF envelopes. In particular, deciding "when" to offer a "Decrypt?" user-interface may be troublesome for larger byte-strings that can be confused as KEF envelopes. There are three obvious strategies to approach any datum in the context that it might be a KEF envelope: + + * **Explicitely as KEF**: where the user has been offered to open a KEF envelope and then the data is loaded -- the application can parse and decrypt assuming that the data is a KEF envelope and would fail if something goes wrong in parsing or decryption. + + * **Fall-back to KEF**: where the user has been offered to load data in a particular context -- the application can treat the data within context and if an error occurs, then fallback to checking if the data might be KEF, offer to "Decrypt?" and then treat the resulting plaintext in the original context. + + * **KEF in Priority**: where the user has been offered to load data in a particular context -- the application can try to parse as a KEF envelope, offering to "Decrypt?" and treating the resulting plaintext in the original context, or if parsing fails (not KEF), treating the original data in context. + +NOTE: in the latter two strategies, each application may consider the context of data which is about to be loaded. If an application is expecting ascii or unicode data, it may be appropriate to use either one since ruling-out KEF could quickly be done via a successful decoding step. When the application is expecting to load binary data, then it is most likely a better strategy to use **Fall-back to KEF** whenever parsing data in context would fail early and efficiently, so that the loosely defined KEF envelope is parsed after plaintext parsing fails. + +In all above cases, because KEF is loosely defined, random byte-strings and even ascii or unicode byte-strings, may be mistaken as KEF envelopes. Probabilities for false-identification of a KEF evelope, and mitigation thereof will be discussed below. + +Because the first byte of a KEF envelope is the `len_id` byte -- defining the length of the user-defined ID (also the PBKDF2 salt), any byte value between 0 and 252 is valid(253, 254, and 255 are reserved for future use). There is a 252:256 probability, 98.44% that any byte-string will require further processing for KEF identification by jumping `len_id` +1 bytes to look for a valid `version` byte. + +Because there are currently 12 KEF versions, certainly more in the future, there is currently a 12:256 probability, or 4.69% chance that this byte-string will require further processing -- by parsing the rest of the envelope. + +Because iterations are the next 3 bytes and there is no explicit maximum limitation on the iterations value (besides what can fit in 3 bytes and what is reasonably useful on any particular device -- likely in the 100s of thousands) any values between 1 and 16M would be a valid KEF iterations value. An implementation, knowing that it runs on devices that can reasonably stretch a key no-more than 600K PBKDF2 iterations may decide that a byte-string with iterations values above that limit is NOT a KEF envelope worth further processing. Still, because KEF envelopes may be created or decrypted on various devices capable of higher PBKDF2 iterations, it is not recommended that an iterations maximum is used for ruling-out a KEF envelope (besides the invalid 0 iterations value); 100% of non-zero KEF iterations values are valid. + +Less than 5% percent of most byte-strings will require further processing to rule-out that they are KEF envelopes, and some that are not may still be identified as plausibly KEF, requiring to prompt the user to "Decrypt?". For block modes (ECB and CBC) the length of the ciphertext (apart from IV and `auth`) may be checked for alignment to 16 bytes, but for stream modes (GCM and CTR) only a minimum length may be checked (IV/Nonce + `auth` + at least 1 byte of ciphertext). Further Strategies for mitigating false-identification as a KEF envelope follow: + +Note in cases where processing has continued this far, that what remains of the remaining byte-string is the cipher payload. This field contains: the optional random IV or nonce, the AES ciphertext, and truncated auth bytes (either sha256 hash digest or internal MAC digest for mode GCM). These bytes should resemble a pseudo-random uniform distribution. It should be rare that they are ASCII or UTF-8 decodable; if so, vanishingly KEF-plausible as the size of the payload increases. + +Below, we'll consider decoding probabilities for the smallest of KEF payloads (for GCM and CTR) which may be no smaller than 17 bytes (12 bytes of random IV/nonce + 1 byte of ciphertext + 4 bytes of truncated auth), as well as larger payloads up to 32 bytes. + +ASCII and UTF-8 decodable for 1-Billion random samples + +| len_cpl | Prob. ASCII | Prob. UTF-8 | +|---------|---------------|---------------| +| 17 | 0.00075990% | 0.00532110% | +| 18 | 0.00037160% | 0.00300620% | +| 19 | 0.00018340% | 0.00168700% | +| 20 | 0.00008920% | 0.00094590% | +| 21 | 0.00004550% | 0.00053940% | +| 22 | 0.00002030% | 0.00029950% | +| 23 | 0.00001010% | 0.00017110% | +| 24 | 0.00000460% | 0.00009360% | +| 25 | 0.00000210% | 0.00005190% | +| 26 | 0.00000090% | 0.00002860% | +| 27 | 0.00000050% | 0.00001530% | +| 28 | 0.00000050% | 0.00001020% | +| 29 | 0.00000040% | 0.00000580% | +| 30 | 0.00000010% | 0.00000320% | +| 31 | 0.00000000% | 0.00000160% | +| 32 | 0.00000000% | 0.00000100% | + +--- + +## 4. Common Structure of a KEF Envelope All KEF versions' encrypted outputs follow this layout: ``` From 0bdf48167f61a576ebb44d6a330fa44fb83c6aa3 Mon Sep 17 00:00:00 2001 From: Jean Do Date: Mon, 8 Sep 2025 09:34:55 -0400 Subject: [PATCH 4/4] Adds "fail_improbable=False" optional param to kef_decrypt(), moved from original prototype in kef.unwrap(), so that callers that know context can quickly rule-out very unlikely strings that would otherwise result in prompting for "Decrypt?"... based on non-uniform distribution of bytes in the payload. example) An ascii string -- or even UTF-8 string, is "almost" never a KEF. --- .../encryption/kef-specifications.en.md | 125 +++++++++--------- src/krux/kef.py | 9 -- src/krux/pages/encryption_ui.py | 11 +- tests/pages/test_encryption_ui.py | 29 ++++ tests/test_kef.py | 2 +- 5 files changed, 104 insertions(+), 72 deletions(-) diff --git a/docs/getting-started/features/encryption/kef-specifications.en.md b/docs/getting-started/features/encryption/kef-specifications.en.md index 122094bb8..c68316c45 100644 --- a/docs/getting-started/features/encryption/kef-specifications.en.md +++ b/docs/getting-started/features/encryption/kef-specifications.en.md @@ -3,7 +3,7 @@ ...`The K stands for "KEF"` --anon -## Motivation +## 1. Motivation In the autumn of 2023, during the lead-up to **krux release 23.09.0**, contributors proposed a method of encrypting bip39 mnemonics that could be stored in SPI-flash, on sdcard, and/or exported to QR. Regarding the encrypted-mnemonic QR format: the layout proposed was interesting as an extensible, lite-weight, self-describing envelope that has been appreciated by users ever since. @@ -13,7 +13,7 @@ This specification, and its accompanying implementation and test-suite are the r Above all, this specification aims to be supported by as many projects as would consider adopting it, so that users are not "locked" into a particular project when recovering their secrets. Corrections and refinement to, and scrutiny of this specification are appreciated. Proposals for more `versions` are welcome, provided they offer "value" to the user and fit within the scope of this system. Once released, because it cannot be known how many KEF envelopes may exist in-the-wild, changes to any particular version must remain backwards compatible for decryption. Adopting implementations are free to support any KEF versions they wish to support, for decryption-only or for both encryption and decryption -- with the expectation that claims-of-support made are clear and precise about what is supported. -## Overview +## 2. Overview This system encrypts arbitrary plaintext into a versioned, self-describing, **KEF envelope** which includes: @@ -54,7 +54,7 @@ Authentication has three forms: * if `auth` will be encrypted with plaintext, hidden, it is `sha256(plaintext)` * if `auth` will be appended to ciphertext, exposed, it is `sha256(version || IV || plaintext || derived-k)` -## Generalizations Regarding Implementation +## 3. Generalizations Regarding Implementation It is expected that any implementation can decrypt a KEF envelope that was created by itself on the same device. Implementations are asked to make their "best-effort" to be capable of decrypting KEF envelopes for versions they support which were created by other implementations or on other devices -- but this will not always be possible. Decrypting large KEF envelopes on severely constrained devices, or ones created with flawed implementations is unrealistic. Therefore, in such cases it is the responsibility of the user to find an implementation and device capable of decrypting their KEF envelope, or to have a non-KEF form of recovery. @@ -75,66 +75,11 @@ It is expected that any implementation can decrypt a KEF envelope that was creat * **On modes that require IV or Nonce** Take precautions to ensure that this value is random and not reused. ie: Natural entropy captured from camera sensor (user validated and/or analyzed to ensure sensor is working / high entropy). -* **On common `bytes` encodings** Outside the strict scope that **KEF envelopes are a format of bytes** but related to this topic: implementations may be presented with encoded strings that are likely to represent bytes. For instance: base64, base43 (from electrum), base32, or hex might be representations of a KEF envelope that was previously encoded for transport. As you continue reading, it will become clear that with any bytestring, one may recognize a KEF envelope by: - 1. reading the first byte as an integer `len_id`, - 2. jumping that many bytes, over the `id`, to read the next byte as an integer `version`, - 3. if that version represents a known and supported KEF version, then the rest of the envelope may be parsed via that version's KEF rules. - 4. if parsing succeeds without errors, it is likely to be a KEF envelope and a decryption user-interface should be offered to the user. - While the user likely knows, the process instance of a KEF implementation will learn definitively, only AFTER a successful decryption, that a bytestring was indeed a KEF envelope. If at any point along this process, an implementation finds that `version` is unknown/disabled, or if parsing fails, the expected action is NOT TO RAISE SPECIFIC ERRORS regarding this inspection. Rather, the appropriate action is to assume it was not a KEF envelope and to treat the data under another context: ie: "Unknown". Similarly, as mentioned above, being vague about errors during decryption implies that "Failed!" may be a sufficient response for any error, instead of leaking to a potential attacker specific details about the failure. - * **On Iterations** Consider that users may want to decrypt KEF envelopes on various resource-constrained devices. There is a minimum 10,000 iterations imposed in any KEF envelope (a value of 1 would be 10,000 pbkdf2_hmac iterations), and the maximum could be as high as 100,000,000 (a value of 10,000), but depending on the device used, 500,000 might be too high. Also, since the user-supplied `key` is stretched by this value, consider offering a range to users -- then adding a small `delta` as extra bits of entropy to derive different AES-256 keys that would otherwise be the same in the event the user re-uses the same `key`, `id` and `iterations` when creating many KEF envelopes. * **On truncated Authentication** At first glance it may be concerning that `auth` bytes for many versions have been truncated and are trivially "weak". Note that KEF's use-case for authentication is to validate that the user has correctly entered their decryption `key`. In the worse case, "false-authenticated" success will occur at a rate of 1:16M (or 1:4B for others) if using an incorrect decryption `key`; similar if an attacker has modified the KEF envelope. In these "false-authenticated" success cases, data will result from decryption, but that data will NOT be the original secret or plaintext; it will be of no value. - -### On Identifying a KEF Envelope - - As noted previously, only the owner of a KEF envelope, having the correct decryption key, can be certain that any string of bytes is indeed a KEF envelope. Because "auth" bytes are truncated, even a KEF implementation that successfully parses and decrypts an envelope without errors may not know if the plaintext was the original secret or "useless" bytes which resulted in validated decryption under another key for the same truncated `auth` bytes. - - This trait, whether deemed a desirable "feature" or an undesirable "bug" should be carefully considered by implementations that deal with KEF envelopes. In particular, deciding "when" to offer a "Decrypt?" user-interface may be troublesome for larger byte-strings that can be confused as KEF envelopes. There are three obvious strategies to approach any datum in the context that it might be a KEF envelope: - - * **Explicitely as KEF**: where the user has been offered to open a KEF envelope and then the data is loaded -- the application can parse and decrypt assuming that the data is a KEF envelope and would fail if something goes wrong in parsing or decryption. - - * **Fall-back to KEF**: where the user has been offered to load data in a particular context -- the application can treat the data within context and if an error occurs, then fallback to checking if the data might be KEF, offer to "Decrypt?" and then treat the resulting plaintext in the original context. - - * **KEF in Priority**: where the user has been offered to load data in a particular context -- the application can try to parse as a KEF envelope, offering to "Decrypt?" and treating the resulting plaintext in the original context, or if parsing fails (not KEF), treating the original data in context. - -NOTE: in the latter two strategies, each application may consider the context of data which is about to be loaded. If an application is expecting ascii or unicode data, it may be appropriate to use either one since ruling-out KEF could quickly be done via a successful decoding step. When the application is expecting to load binary data, then it is most likely a better strategy to use **Fall-back to KEF** whenever parsing data in context would fail early and efficiently, so that the loosely defined KEF envelope is parsed after plaintext parsing fails. - -In all above cases, because KEF is loosely defined, random byte-strings and even ascii or unicode byte-strings, may be mistaken as KEF envelopes. Probabilities for false-identification of a KEF evelope, and mitigation thereof will be discussed below. - -Because the first byte of a KEF envelope is the `len_id` byte -- defining the length of the user-defined ID (also the PBKDF2 salt), any byte value between 0 and 252 is valid(253, 254, and 255 are reserved for future use). There is a 252:256 probability, 98.44% that any byte-string will require further processing for KEF identification by jumping `len_id` +1 bytes to look for a valid `version` byte. - -Because there are currently 12 KEF versions, certainly more in the future, there is currently a 12:256 probability, or 4.69% chance that this byte-string will require further processing -- by parsing the rest of the envelope. - -Because iterations are the next 3 bytes and there is no explicit maximum limitation on the iterations value (besides what can fit in 3 bytes and what is reasonably useful on any particular device -- likely in the 100s of thousands) any values between 1 and 16M would be a valid KEF iterations value. An implementation, knowing that it runs on devices that can reasonably stretch a key no-more than 600K PBKDF2 iterations may decide that a byte-string with iterations values above that limit is NOT a KEF envelope worth further processing. Still, because KEF envelopes may be created or decrypted on various devices capable of higher PBKDF2 iterations, it is not recommended that an iterations maximum is used for ruling-out a KEF envelope (besides the invalid 0 iterations value); 100% of non-zero KEF iterations values are valid. - -Less than 5% percent of most byte-strings will require further processing to rule-out that they are KEF envelopes, and some that are not may still be identified as plausibly KEF, requiring to prompt the user to "Decrypt?". For block modes (ECB and CBC) the length of the ciphertext (apart from IV and `auth`) may be checked for alignment to 16 bytes, but for stream modes (GCM and CTR) only a minimum length may be checked (IV/Nonce + `auth` + at least 1 byte of ciphertext). Further Strategies for mitigating false-identification as a KEF envelope follow: - -Note in cases where processing has continued this far, that what remains of the remaining byte-string is the cipher payload. This field contains: the optional random IV or nonce, the AES ciphertext, and truncated auth bytes (either sha256 hash digest or internal MAC digest for mode GCM). These bytes should resemble a pseudo-random uniform distribution. It should be rare that they are ASCII or UTF-8 decodable; if so, vanishingly KEF-plausible as the size of the payload increases. - -Below, we'll consider decoding probabilities for the smallest of KEF payloads (for GCM and CTR) which may be no smaller than 17 bytes (12 bytes of random IV/nonce + 1 byte of ciphertext + 4 bytes of truncated auth), as well as larger payloads up to 32 bytes. - -ASCII and UTF-8 decodable for 1-Billion random samples - -| len_cpl | Prob. ASCII | Prob. UTF-8 | -|---------|---------------|---------------| -| 17 | 0.00075990% | 0.00532110% | -| 18 | 0.00037160% | 0.00300620% | -| 19 | 0.00018340% | 0.00168700% | -| 20 | 0.00008920% | 0.00094590% | -| 21 | 0.00004550% | 0.00053940% | -| 22 | 0.00002030% | 0.00029950% | -| 23 | 0.00001010% | 0.00017110% | -| 24 | 0.00000460% | 0.00009360% | -| 25 | 0.00000210% | 0.00005190% | -| 26 | 0.00000090% | 0.00002860% | -| 27 | 0.00000050% | 0.00001530% | -| 28 | 0.00000050% | 0.00001020% | -| 29 | 0.00000040% | 0.00000580% | -| 30 | 0.00000010% | 0.00000320% | -| 31 | 0.00000000% | 0.00000160% | -| 32 | 0.00000000% | 0.00000100% | +* **On common `bytes` encodings** KEF does not explicitely define how a KEF envelope is encoded for storage or transport, however each implementation will need to deal with encodings. This topic is further discussed below in the section **On Identifying a KEF envelope**. --- @@ -470,7 +415,7 @@ k: pbkdf2_hmac_sha256(K, id, i) --- -#### Summary Table +## 5. KEF Versions Summary Table | Ver | Name | Mode | IV | Padding | Compress | Authentication Method | Auth | Intended Use Case | |-----|------------|------|----|---------|----------|------------------------|-------------|-------------------------| @@ -489,7 +434,7 @@ k: pbkdf2_hmac_sha256(K, id, i) --- -## KEF Implementation Concepts +## 6. KEF Implementation Concepts Using examples from, and as an introduction to the reference [KEF implementation](https://github.com/selfcustody/krux/blob/develop/src/krux/kef.py), we'll quickly cover some basic concepts that may be helpful in getting started with your own KEF implementation. @@ -514,3 +459,61 @@ After encryption, you'll need to **wrap** the `id`, `version`, `iterations` and ### On Further Encoding KEF Envelopes Outside the scope of this specification on KEF envelopes, which are strings of bytes, implementations will surely need to make choices about encoding/decoding schemes. Whether for QR transport, copy-pasting into messages, embedding into json documents, in-plain-sight within other document formats, or persisted in binary files, these choices are left to implementors. + +## 7. On Identifying a KEF Envelope + +Outside the strict scope that **KEF envelopes are a format of bytes** but related to this topic: implementations may be presented with encoded strings that are likely to represent bytes. For instance: base64, base43 (from electrum), base32, or hex might be representations of a KEF envelope that was previously encoded for transport. As you continue reading, it will become clear that with any bytestring, one may recognize a KEF envelope by: + +1. reading the first byte as an integer `len_id`, +2. jumping that many bytes, over the `id`, to read the next byte as an integer `version`, +3. if that version represents a known and supported KEF version, then the rest of the envelope may be parsed via that version's KEF rules. +4. if parsing succeeds without errors, it is likely to be a KEF envelope and a decryption user-interface should be offered to the user. + + While the user likely knows, the process instance of a KEF implementation will learn definitively -- only AFTER a successful decryption, that a bytestring was likely a KEF envelope. If at any point along this process, an implementation finds that `version` is unknown/disabled, or if parsing fails, the expected action is NOT TO RAISE SPECIFIC ERRORS regarding this inspection. Rather, the appropriate action is to assume it was not a KEF envelope and to treat the data under another context: ie: "Unknown". Similarly, as mentioned previously, being vague about errors during decryption implies that "Failed!" may be a sufficient response for any error, instead of leaking to a potential attacker specific details about the failure. + +Because "auth" bytes are truncated, even a KEF implementation that successfully parses and decrypts an envelope without errors may not know if the plaintext was the original secret or "useless" bytes which resulted in validated decryption under another key for the same truncated `auth` bytes. The most valuable "authentication" of a KEF envelope often exists outside of KEF, because the decrypted secret is of some value ot the owner: ie: a mnemonic, or passphrase, or descriptor which recovers their wallet. + +This trait, whether deemed a desirable "feature" or an undesirable "bug" should be carefully considered by implementations that deal with KEF envelopes. In particular, deciding "when" to offer a "Decrypt?" user-interface may be troublesome for larger byte-strings that can be confused as KEF envelopes. There are three obvious strategies to approach any datum in the context that it might be a KEF envelope: + +* **Explicitely as KEF**: where the user has been offered to open a KEF envelope and then the data is loaded -- the application can parse and decrypt assuming that the data is a KEF envelope and would fail if something goes wrong in parsing or decryption. + +* **Fall-back to KEF**: where the user has been offered to load data in a particular context -- the application can treat the data within context and if an error occurs, then fallback to checking if the data might be KEF, offer to "Decrypt?" and then treat the resulting plaintext in the original context. + +* **KEF in Priority**: where the user has been offered to load data in a particular context -- the application can try to parse as a KEF envelope, offering to "Decrypt?" and treating the resulting plaintext in the original context, or if parsing fails (not KEF), treating the original data in context. + +NOTE: in the latter two strategies, each application may consider the context of data which is about to be loaded. If an application is expecting ascii or unicode data, it may be appropriate to use either one since ruling-out KEF could quickly be done via a successful decoding step. When the application is expecting to load binary data, then it is most likely a better strategy to use **Fall-back to KEF** whenever parsing data in context would fail early and efficiently, so that the loosely defined KEF envelope is parsed after plaintext parsing fails. + +In all above cases, because KEF is loosely defined, random byte-strings and even ascii or unicode byte-strings, may be mistaken as KEF envelopes. Probabilities for false-identification of a KEF envelope, and mitigation thereof will be discussed below. + +Because the first byte of a KEF envelope is the `len_id` byte -- defining the length of the user-defined ID (also the PBKDF2 salt), any byte value between 0 and 252 is valid(253, 254, and 255 are reserved for future use). There is a 252:256 probability, 98.44% that any byte-string will require further processing for KEF identification by jumping `len_id` +1 bytes to look for a valid `version` byte. + +Because there are currently 12 KEF versions, certainly more in the future, there is currently a 12:256 probability, or 4.69% chance that this byte-string will require further processing -- by parsing the rest of the envelope. + +Because iterations are the next 3 bytes and there is no explicit maximum limitation on the iterations value (besides what can fit in 3 bytes and what is reasonably useful on any particular device -- likely in the 100s of thousands) any values between 1 and 16M would be a valid KEF iterations value. An implementation, knowing that it runs on devices that can reasonably stretch a key no-more than 600K PBKDF2 iterations may decide that a byte-string with iterations values above that limit is NOT a KEF envelope worth further processing. Still, because KEF envelopes may be created or decrypted on various devices capable of higher PBKDF2 iterations, it is not recommended that an iterations maximum is used for ruling-out a KEF envelope (besides the invalid 0 iterations value); 100% of non-zero KEF iterations values are valid. + +Less than 5% percent of most byte-strings will require further processing to rule-out that they are KEF envelopes, and some that are not may still be identified as plausibly KEF, requiring to prompt the user to "Decrypt?". For block modes (ECB and CBC) the length of the ciphertext (apart from IV and `auth`) may be checked for alignment to 16 bytes, but for stream modes (GCM and CTR) only a minimum length may be checked (IV/Nonce + `auth` + at least 1 byte of ciphertext). Further Strategies for mitigating false-identification as a KEF envelope follow: + +Note in cases where processing has continued this far, that what remains of the remaining byte-string is the cipher payload. This field contains: the optional random IV or nonce, the AES ciphertext, and truncated auth bytes (either sha256 hash digest or internal MAC digest for mode GCM). These bytes should resemble a pseudo-random uniform distribution. It should be rare that they are ASCII or UTF-8 decodable; if so, vanishingly KEF-plausible as the size of the payload increases. + +Below, we'll consider decoding probabilities for the smallest of KEF payloads (for GCM and CTR) which may be no smaller than 17 bytes (12 bytes of random IV/nonce + 1 byte of ciphertext + 4 bytes of truncated auth), as well as larger payloads up to 32 bytes. + +ASCII and UTF-8 decodable for 1-Billion random samples + +| len_cpl | Prob. ASCII | Prob. UTF-8 | +|---------|---------------|---------------| +| 17 | 0.00075990% | 0.00532110% | +| 18 | 0.00037160% | 0.00300620% | +| 19 | 0.00018340% | 0.00168700% | +| 20 | 0.00008920% | 0.00094590% | +| 21 | 0.00004550% | 0.00053940% | +| 22 | 0.00002030% | 0.00029950% | +| 23 | 0.00001010% | 0.00017110% | +| 24 | 0.00000460% | 0.00009360% | +| 25 | 0.00000210% | 0.00005190% | +| 26 | 0.00000090% | 0.00002860% | +| 27 | 0.00000050% | 0.00001530% | +| 28 | 0.00000050% | 0.00001020% | +| 29 | 0.00000040% | 0.00000580% | +| 30 | 0.00000010% | 0.00000320% | +| 31 | 0.00000000% | 0.00000160% | +| 32 | 0.00000000% | 0.00000100% | diff --git a/src/krux/kef.py b/src/krux/kef.py index f624e0adb..094c6c255 100644 --- a/src/krux/kef.py +++ b/src/krux/kef.py @@ -502,15 +502,6 @@ def unwrap(kef_bytes): raise ValueError("Ciphertext is not aligned") if len(payload) < extra + 1: raise ValueError("Ciphertext is too short") - - # TODO: consider better probabilistic strategies to rule-out KEF identification - # cipher payload should appear random - if len(payload) > 32: - upper128 = len([x for x in payload if x > 127]) - if upper128 == 0: - raise ValueError("Payload is ascii") - if upper128 < len(payload) * 0.001: - raise ValueError("Distribution of payload not uniform") return (id_, version, iterations, payload) diff --git a/src/krux/pages/encryption_ui.py b/src/krux/pages/encryption_ui.py index b6c3e9be4..88f8c6736 100644 --- a/src/krux/pages/encryption_ui.py +++ b/src/krux/pages/encryption_ui.py @@ -49,7 +49,7 @@ ENCRYPTION_KEY_MAX_LEN = 200 -def decrypt_kef(ctx, data): +def decrypt_kef(ctx, data, fail_improbable=False): """finds kef-envelope and returns data fully decrypted, else ValueError""" from binascii import unhexlify from krux.baseconv import base_decode, hint_encodings @@ -104,6 +104,15 @@ def decrypt_kef(ctx, data): if not kef_envelope.parse(data): raise ValueError(err) + # TODO: consider better probabilistic strategies to rule-out KEF identification + # cipher payload should appear random + if fail_improbable: + upper128 = len([x for x in kef_envelope.ciphertext if x > 127]) + if upper128 == 0: + raise ValueError("Payload is ascii") + if upper128 < len(kef_envelope.ciphertext) * 0.1: + raise ValueError("Distribution of payload hardly uniform") + # unpack as many kef_envelopes as there may be while True: data = kef_envelope.unseal_ui() diff --git a/tests/pages/test_encryption_ui.py b/tests/pages/test_encryption_ui.py index 92f83abff..0a8b3ef1c 100644 --- a/tests/pages/test_encryption_ui.py +++ b/tests/pages/test_encryption_ui.py @@ -843,6 +843,35 @@ def test_decrypt_kef_offers_decrypt_ui_appropriately(m5stickv, mocker): ctx.display.to_lines.assert_not_called() +def test_decrypt_kef_with_fail_improbable(m5stickv, mocker): + from krux.pages.encryption_ui import decrypt_kef + from krux.input import BUTTON_PAGE_PREV + + # if fail_improbable: avoids decryption if cipherpayload is ascii + false_kef = s = b"\x07not KEF\x05\x00\x00\x01I am NOT ciphertext" + ctx = create_ctx(mocker, []) + with pytest.raises(ValueError, match="Payload is ascii"): + decrypt_kef(ctx, false_kef, fail_improbable=True) + assert ctx.input.wait_for_button.call_count == 0 + + # ...also fails if less than 10% of payload bytes are above 0x7f + false_kef = false_kef[:-1] + b"\x80" + ctx = create_ctx(mocker, []) + with pytest.raises(ValueError, match="Distribution of payload hardly uniform"): + decrypt_kef(ctx, false_kef, fail_improbable=True) + assert ctx.input.wait_for_button.call_count == 0 + + # but will prompt "Decrypt?" if >10% of payload bytes are above 0x7f + false_kef = false_kef[:-2] + b"\x80\xff" + ctx = create_ctx(mocker, [BUTTON_PAGE_PREV]) + with pytest.raises(ValueError, match="Not decrypted"): + decrypt_kef(ctx, false_kef, fail_improbable=True) + assert ctx.input.wait_for_button.call_count == 1 + ctx.display.to_lines.assert_called_with( + "KEF Encrypted (19 B)\nID: not KEF\nVersion: AES-ECB\nPBKDF2 iter.: 10000\n\nDecrypt?" + ) + + def test_prompt_for_text_update_dflt_via_yes(m5stickv, mocker): from krux.pages.encryption_ui import prompt_for_text_update from krux.input import BUTTON_ENTER diff --git a/tests/test_kef.py b/tests/test_kef.py index 6e39ee420..62ab3539e 100644 --- a/tests/test_kef.py +++ b/tests/test_kef.py @@ -935,7 +935,7 @@ def i2b(an_int): # mock values so that iterations will be at bytes[6:9] id_ = b"test" version = 0 - ciphertext = bytes([x*8 for x in range(32)]) + ciphertext = bytes([x * 8 for x in range(32)]) # if (iterations % 10000 == 0) they are serialized divided by 10000 for iterations in (ten_k, ten_k * 10, ten_k * 50, ten_k * ten_k):