From 0437462fea150bdf26ff241084071820f42b42e9 Mon Sep 17 00:00:00 2001 From: conraddd Date: Tue, 2 Jun 2026 15:07:52 +0800 Subject: [PATCH] feat: add IP formatting function for Azure Storage Firewall - Introduced `format_ips_for_azure_storage` function to convert `/31` and `/32` CIDRs into individual IP addresses for compatibility with Azure Storage Firewall. - Updated `databricks_ip_sync` function to utilize the new formatting function for IP comparison. - Revised documentation to clarify the handling of CIDR formats in the context of Azure Storage Account network rules. --- docs/firewall-automation-guide.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/firewall-automation-guide.md b/docs/firewall-automation-guide.md index eb68c2a..2d9e9f3 100644 --- a/docs/firewall-automation-guide.md +++ b/docs/firewall-automation-guide.md @@ -425,9 +425,11 @@ az network firewall policy rule-collection-group collection rule add \ ```python # function_app.py import azure.functions as func +import ipaddress import logging import os import urllib.request +from typing import List from azure.identity import DefaultAzureCredential from azure.mgmt.network import NetworkManagementClient @@ -454,6 +456,26 @@ STORAGE_RULE_CAP = int(os.environ.get("STORAGE_RULE_CAP", "200")) app = func.FunctionApp() + +def format_ips_for_azure_storage(raw_cidr_list: List[str]) -> List[str]: + """ + Return the valid IP addresses for Azure Storage Firewall. + + If the CIDR is a /31 or /32, return the individual IP addresses in the range. + Otherwise, return the CIDR. + """ + cleaned_ips = [] + for raw_cidr in raw_cidr_list: + net = ipaddress.ip_network(raw_cidr, strict=False) + if net.prefixlen in (31, 32): + for ip in net: + cleaned_ips.append(str(ip)) + else: + cleaned_ips.append(raw_cidr) + + return cleaned_ips + + @app.timer_trigger(schedule="0 0 1 * * MON", arg_name="timer", run_on_startup=False, use_monitor=True) def databricks_ip_sync(timer: func.TimerRequest) -> None: @@ -510,7 +532,7 @@ def databricks_ip_sync(timer: func.TimerRequest) -> None: # present, leave unrelated rules (e.g. corp egress IPs) untouched. if STORAGE_ACCOUNTS: storage_client = StorageManagementClient(credential, SUBSCRIPTION_ID) - new_set = set(new_ips) + new_set = set(format_ips_for_azure_storage(new_ips)) for pair in [p.strip() for p in STORAGE_ACCOUNTS.split(",") if p.strip()]: if "/" not in pair: @@ -526,6 +548,7 @@ def databricks_ip_sync(timer: func.TimerRequest) -> None: # Treat any rule in the existing set that came from Databricks as managed, # everything else stays put. Without a tag mechanism on IP rules, we use # set membership against the previously-applied list as the boundary. + current = set(format_ips_for_azure_storage(list(current))) # make sure it is an apple-to-apple comparison managed_ips = existing_ips & current # rules we're responsible for unmanaged_ips = existing_ips - current # leave alone (corp IPs, etc.) @@ -567,7 +590,7 @@ def databricks_ip_sync(timer: func.TimerRequest) -> None: ) ``` -> **Storage Account caveats.** Network rules accept public IPv4 only — `/31` and `/32` CIDRs are rejected by the portal but accepted by the SDK as single IPs. RFC1918 ranges are silently ignored. If the Databricks feed contains an IPv6 entry it will fail the API call; filter the feed or strip IPv6 before applying. +> **Storage Account caveats.** Network rules accept public IPv4 only — `/31` and `/32` CIDRs are rejected by both the portal and SDK, they need to be converted into individual IPs first. RFC1918 ranges are silently ignored. If the Databricks feed contains an IPv6 entry it will fail the API call; filter the feed or strip IPv6 before applying. ### Step 4: Managed Identity Permissions (least privilege)