From f1fec77049bcf66fb5bca4825a7869218cd79d76 Mon Sep 17 00:00:00 2001 From: pclee-demo Date: Tue, 21 Apr 2026 13:49:34 +1000 Subject: [PATCH] fix: align mask_amount_rounded with function registry and add missing PII maskers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename mask_amount_round → mask_amount_rounded (canonical name in function_registry.json) - Fix return type from RETURNS STRING to RETURNS DECIMAL(18,2) and remove $ prefix that broke DECIMAL/DOUBLE columns at runtime - Add mask_email, mask_phone, mask_address functions (ported from retail.yaml) so the LLM has matching functions for tagged PII columns - Add Phone Number and Customer Address identifiers - Update prompt_overlay to reflect all changes Co-authored-by: Isaac --- shared/industries/financial_services.yaml | 72 +++++++++++++++++++++-- 1 file changed, 66 insertions(+), 6 deletions(-) diff --git a/shared/industries/financial_services.yaml b/shared/industries/financial_services.yaml index b04d386..4ef49b9 100644 --- a/shared/industries/financial_services.yaml +++ b/shared/industries/financial_services.yaml @@ -47,7 +47,7 @@ identifiers: - transfer_amount format: "Decimal currency value" sensitivity: confidential - masking_function: mask_amount_round + masking_function: mask_amount_rounded category: transaction - name: Customer SSN @@ -80,6 +80,28 @@ identifiers: masking_function: mask_email category: customer_pii + - name: Phone Number + column_hints: + - phone + - phone_number + - mobile + - contact_phone + format: "Various formats" + sensitivity: confidential + masking_function: mask_phone + category: customer_pii + + - name: Customer Address + column_hints: + - address + - street_address + - mailing_address + - billing_address + format: "Free text" + sensitivity: confidential + masking_function: mask_address + category: customer_pii + masking_functions: - name: mask_account_last4 signature: "mask_account_last4(acct STRING) RETURNS STRING" @@ -114,13 +136,13 @@ masking_functions: ELSE '[REDACTED]' END - - name: mask_amount_round - signature: "mask_amount_round(amount DOUBLE) RETURNS STRING" + - name: mask_amount_rounded + signature: "mask_amount_rounded(amount DECIMAL(18,2)) RETURNS DECIMAL(18,2)" comment: "Transaction amount — round to nearest thousand for non-privileged users" body: | CASE WHEN amount IS NULL THEN NULL - ELSE CONCAT('$', CAST(ROUND(amount, -3) AS STRING)) + ELSE ROUND(amount, -3) END - name: mask_ssn_last4 @@ -144,6 +166,37 @@ masking_functions: ELSE '[REDACTED]' END + - name: mask_email + signature: "mask_email(email STRING) RETURNS STRING" + comment: "Email address — mask local part, preserve domain for analytics" + body: | + CASE + WHEN email IS NULL THEN NULL + WHEN INSTR(email, '@') > 0 THEN + CONCAT(LEFT(SUBSTRING_INDEX(email, '@', 1), 1), '****@', SUBSTRING_INDEX(email, '@', -1)) + ELSE '[REDACTED]' + END + + - name: mask_phone + signature: "mask_phone(phone STRING) RETURNS STRING" + comment: "Phone number — show last 4 digits only" + body: | + CASE + WHEN phone IS NULL THEN NULL + WHEN LENGTH(REGEXP_REPLACE(phone, '[^0-9]', '')) >= 4 THEN + CONCAT('***-***-', RIGHT(REGEXP_REPLACE(phone, '[^0-9]', ''), 4)) + ELSE '[REDACTED]' + END + + - name: mask_address + signature: "mask_address(addr STRING) RETURNS STRING" + comment: "Street address — full redaction for financial services compliance" + body: | + CASE + WHEN addr IS NULL THEN NULL + ELSE '[ADDRESS REDACTED]' + END + - name: filter_aml_compliance signature: "filter_aml_compliance() RETURNS BOOLEAN" comment: "AML row filter — only compliance and fraud teams see all rows" @@ -197,7 +250,7 @@ prompt_overlay: | **Transaction Data:** - Transaction Amount: Decimal currency. Columns: `transaction_amount`, `txn_amount`, `amount`. - Use `mask_amount_round` — round to nearest thousand for non-privileged users. + Use `mask_amount_rounded` — round to nearest thousand for non-privileged users. **Customer PII:** - SSN: 9 digits. Columns: `ssn`, `social_security`, `tax_id`. @@ -206,14 +259,21 @@ prompt_overlay: | Use `mask_name` — show first initial only. - Customer Email: Columns: `customer_email`, `email_address`. Use `mask_email` — mask local part, keep domain. + - Phone Number: Columns: `phone`, `phone_number`, `contact_phone`. + Use `mask_phone` — show last 4 digits only. + - Customer Address: Columns: `address`, `street_address`, `billing_address`. + Use `mask_address` — full redaction. **Available Financial Services Masking Functions:** - `mask_account_last4(acct STRING) RETURNS STRING` — last 4 digits visible - `mask_routing(rtn STRING) RETURNS STRING` — last 4 digits visible - `mask_card_last4(card STRING) RETURNS STRING` — PCI compliant, last 4 digits - - `mask_amount_round(amount DOUBLE) RETURNS STRING` — rounded to nearest thousand + - `mask_amount_rounded(amount DECIMAL(18,2)) RETURNS DECIMAL(18,2)` — rounded to nearest thousand - `mask_ssn_last4(ssn STRING) RETURNS STRING` — last 4 digits visible - `mask_name(name STRING) RETURNS STRING` — first initial only + - `mask_email(email STRING) RETURNS STRING` — first initial + domain visible + - `mask_phone(phone STRING) RETURNS STRING` — last 4 digits visible + - `mask_address(addr STRING) RETURNS STRING` — full redaction **Suggested Group Structure:** - `fraud_team` / `compliance_officer`: Full access to all data (AML/SOX compliance)