From fceda95abd6aca9d777ab1815e70a86aee7640df Mon Sep 17 00:00:00 2001 From: diego lopes Date: Mon, 4 May 2026 15:05:06 +0200 Subject: [PATCH] Adding unity catalog skills --- manifest.json | 37 ++++++--- skills/databricks-unitycatalog/SKILL.md | 49 +++++++++++ .../agents/openai.yaml | 7 ++ .../assets/databricks.png | Bin 0 -> 15366 bytes .../assets/databricks.svg | 3 + .../references/access-control.md | 75 +++++++++++++++++ .../references/ai-ml-objects.md | 62 ++++++++++++++ .../references/lineage-and-observability.md | 66 +++++++++++++++ .../references/namespace-and-objects.md | 76 ++++++++++++++++++ .../references/operations-and-migration.md | 66 +++++++++++++++ .../references/storage-and-connections.md | 60 ++++++++++++++ .../references/volumes.md | 47 +++++++++++ 12 files changed, 539 insertions(+), 9 deletions(-) create mode 100644 skills/databricks-unitycatalog/SKILL.md create mode 100644 skills/databricks-unitycatalog/agents/openai.yaml create mode 100644 skills/databricks-unitycatalog/assets/databricks.png create mode 100644 skills/databricks-unitycatalog/assets/databricks.svg create mode 100644 skills/databricks-unitycatalog/references/access-control.md create mode 100644 skills/databricks-unitycatalog/references/ai-ml-objects.md create mode 100644 skills/databricks-unitycatalog/references/lineage-and-observability.md create mode 100644 skills/databricks-unitycatalog/references/namespace-and-objects.md create mode 100644 skills/databricks-unitycatalog/references/operations-and-migration.md create mode 100644 skills/databricks-unitycatalog/references/storage-and-connections.md create mode 100644 skills/databricks-unitycatalog/references/volumes.md diff --git a/manifest.json b/manifest.json index 54ec72f..3553b78 100644 --- a/manifest.json +++ b/manifest.json @@ -1,12 +1,12 @@ { "version": "2", - "updated_at": "2026-04-30T11:02:41Z", + "updated_at": "2026-05-04T13:00:55Z", "skills": { "databricks-apps": { "version": "0.1.1", "description": "Databricks Apps development and deployment (evaluates analytics vs synced tables data access)", "experimental": false, - "updated_at": "2026-04-30T11:00:26Z", + "updated_at": "2026-04-30T11:19:36Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -33,7 +33,7 @@ "version": "0.1.0", "description": "Core Databricks skill for CLI, auth, and data exploration", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-04T12:38:42Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -48,7 +48,7 @@ "version": "0.0.0", "description": "Declarative Automation Bundles (DABs) for deploying and managing Databricks resources", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-04-30T11:19:36Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -66,7 +66,7 @@ "version": "0.1.0", "description": "Databricks Jobs orchestration and scheduling", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-04-30T11:19:36Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -78,7 +78,7 @@ "version": "0.1.0", "description": "Databricks Lakebase Postgres: projects, scaling, connectivity, synced tables, and Data API", "experimental": false, - "updated_at": "2026-04-30T11:02:37Z", + "updated_at": "2026-04-30T11:19:36Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -93,7 +93,7 @@ "version": "0.1.0", "description": "Databricks Model Serving endpoint management", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-04-30T11:19:36Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -105,7 +105,7 @@ "version": "0.1.0", "description": "Databricks Pipelines (DLT) for ETL and streaming", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-04-30T11:19:36Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -152,7 +152,7 @@ "version": "0.1.0", "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes", "experimental": false, - "updated_at": "2026-04-24T15:10:23Z", + "updated_at": "2026-04-30T11:19:36Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -164,6 +164,25 @@ "references/networking-and-security.md", "references/streaming-migration.md" ] + }, + "databricks-unitycatalog": { + "version": "0.1.0", + "description": "Unity Catalog governance: discovery, grants, volumes, external locations, lineage, and UC-managed objects", + "experimental": false, + "updated_at": "2026-05-04T12:42:21Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg", + "references/access-control.md", + "references/ai-ml-objects.md", + "references/lineage-and-observability.md", + "references/namespace-and-objects.md", + "references/operations-and-migration.md", + "references/storage-and-connections.md", + "references/volumes.md" + ] } } } diff --git a/skills/databricks-unitycatalog/SKILL.md b/skills/databricks-unitycatalog/SKILL.md new file mode 100644 index 0000000..2cc7822 --- /dev/null +++ b/skills/databricks-unitycatalog/SKILL.md @@ -0,0 +1,49 @@ +--- +name: databricks-unitycatalog +description: "Unity Catalog governance operations: discovery, grants, volumes, external locations, and UC object workflows." +compatibility: Requires databricks CLI (>= v0.292.0) +metadata: + version: "0.1.0" +parent: databricks-core +--- + +# Databricks Unity Catalog + +**FIRST**: Use the parent `databricks-core` skill for CLI basics, authentication, and profile selection. + +Use this skill for Unity Catalog governance and day-2 operations: namespaces and objects, discovery, grants/privileges, volumes, external locations, storage credentials, lineage/observability, and UC-managed AI/ML objects. + +## Required Reading by Task + +| Task | READ BEFORE proceeding | +|------|------------------------| +| Discover catalogs/schemas/tables; search metadata | [Namespace & discovery](references/namespace-and-objects.md) | +| Grants, privileges, ownership/MANAGE, RLS/CLS | [Access control](references/access-control.md) | +| Read/write files via Volumes | [Volumes](references/volumes.md) | +| External locations, storage credentials, federation, sharing | [Storage & connections](references/storage-and-connections.md) | +| Lineage, tags, audit logs, cost attribution | [Lineage & observability](references/lineage-and-observability.md) | +| Maintenance, time travel, migration, constraints, clone | [Operations & migration](references/operations-and-migration.md) | +| Models, functions, vector search, feature tables | [AI & ML objects](references/ai-ml-objects.md) | + +## Priorities (P1 → P3) + +- **P1**: Access control (grants/privileges), volumes + external locations, and metadata discovery (`information_schema`) +- **P2**: Lineage/observability (tags, audit logs), federation/sharing patterns, and operational best practices +- **P3**: Billing and cost attribution patterns (system tables) + +## Key gotchas (do not skip) + +- **CLI args**: many UC list/get commands use **positional** arguments (see parent `databricks-core` quick reference). +- **File privileges**: **`WRITE FILES` requires `READ FILES`** (common cause of confusing permission errors). +- **Discovery without data**: `BROWSE` enables seeing objects without reading table data. +- **Ownership vs MANAGE**: these are not interchangeable; confirm which is required for the operation. + +## Reference Guides + +- [Namespace & discovery](references/namespace-and-objects.md) +- [Access control](references/access-control.md) +- [Volumes](references/volumes.md) +- [Storage & connections](references/storage-and-connections.md) +- [Lineage & observability](references/lineage-and-observability.md) +- [Operations & migration](references/operations-and-migration.md) +- [AI & ML objects](references/ai-ml-objects.md) diff --git a/skills/databricks-unitycatalog/agents/openai.yaml b/skills/databricks-unitycatalog/agents/openai.yaml new file mode 100644 index 0000000..e9e90ef --- /dev/null +++ b/skills/databricks-unitycatalog/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Databricks Unity Catalog" + short_description: "UC governance: grants, volumes, external locations" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $databricks-unitycatalog for Unity Catalog governance tasks (grants, volumes, external locations, discovery)." diff --git a/skills/databricks-unitycatalog/assets/databricks.png b/skills/databricks-unitycatalog/assets/databricks.png new file mode 100644 index 0000000000000000000000000000000000000000..263fe98b84e8ff3516edc93e7c99230fb8fb3113 GIT binary patch literal 15366 zcmeHuwGvL_%Pi2UiaF2#a{7SAsT9mWLN30;^Ey{$?uT3zOpOTyHdK{j^8hhfvh5NwU}M^})*R($^?%1Z)$O zv6^?FX-TiMYx?H}6GoR%4rDx|NW8K`Drt!9hxATaunL8TRATtgiasJ(Q{PhVpMNi)U<1Ugcz_bCAU;~4{5W3% z5W$DAqRanFT@??0-Ol$8^xsbi-0mN(|4;h=odd}sIzOAK*$SI5KK$-&e7Gh)_%Zq+ z5gXOfSo)LD>!=#p3^9Dt*&Pw%L%YAZ7t5}5PezRE``s=U8v*!45K%_IS;-gEhzPwMWE=_QBR8TZ%nf|E~Ns4QQ zo-p>@gQ2AH!E-$3T;wb(ezjD?`9$2^z1i=E37uyZNW?>m-ne;7dKS-L%BhhsVf62=-{0M$lV7G(h{>xM zez-sySdqqLiJY|~n91da9X4#Rki0qBIQOGKZ1je*!}|j9kfVf@80lyH{sW@3(_z1l zcN)%mi%w3NcbX-Gk4Oo800%y|ovstTrVu?p5xl__L2R53ty&+g{&ve(luo~h0gZ>e z(bA{wVou=Fir+@7wKdSi?VERb2G3ylXh#?yIk~zt)m#OS!OStL!k+^1MK$G=RvHg( zgrdktULkPyvC$XoA$2iXkNs#FZI%ofFf-WR+i9(wj!NpexzT%w(&)FN>)#KF}R5`TG0 zX$YOQ&K;pp=}1Dg3ZC&iq_$|tk=d2@7yXn#44k%_!^exNKj2555E?j!V1COIMuMk3;Tcp)oyUGH^1MNc-BQ^FjKlg>g6;DfuE&ezv z;K5yA!a^UgXmH^DOQVlGb3W&Y1Be=!? z-fFUfE`kR&fkQbYR9%=d<1n9li=TwY9GUsmI(_mcfMj}8X*QY=NwnfS_zb%x)8143 zv%uFw^=CQn)(Bc!o&aJdKl(z55WiYxQTUY*kWL8Ej<46QPtr7sS8E1eI^KfA0m8_N zf-&Q@^s141gb{Wci)J&F^S_2}J&B7uGBXWxN|6QMk96Z>J)QiXdr}C?Pu0YA z{#ZO@l}0n)X9MRz-_)f+Cco!~y!AVij|t5prz?5eGAB8My`E#Y1ZMdpUvT@nBY}L( zA)%##sCc=r(<~aLi0YFsIFxuF<7x8IoQxbJ9zKOM#F|8k)FXt@fyBR89^8+6qI{3*CXCh|Hk)~swk8o258(Z#m*~= z6x-A!ryOMHY7c8|p~-X(nL6Lr*RJ`j6bRQDZ8jPhJp6@SvD>e{i{e4tXnawv&OGY5 z-5RHFqHgZH^;jZ&c5qS@9%ZUTbuIXdIWe_-&cM9 z$D#I@?3_>X<=2RrE4j)af>`^1$90^XNa!eF(8BYLFZkeBcAAqfT z>Cvjn?v9bp*`0)+tuGwg%qWs(S(u+^a-Ov=MZoxm%ZX`X?rzGA^9(ynWqdBIdSbgC zK%Z!<9S^jWeuLwL62WMJR1;IdgG+Rw9o|y1l8eUbO|*GCCoGX3+filj{UCxNpA!nE z)-wk~zio(B_wWqXT2GG^3Nj%H&6V~c9u#uK$X{QNkuFFGoz19zfjpz5PF;`jWMHi} za0$XAu65Km;CtpFI-Tz@Lx#}gKGqx6tp?M~`7|F6d2^RSnGvO_(Z!47L&7&WevQ~L zCIEjb80bqBOn5L)eRi9F>&Y;G#55(zFBUi>`tIy^=fFGUw+|C61_C#Cy<6~k?F2X+ zNO5$Pn{-b^y%4;4X}C5U1P5dTsM8i6)w%6I8zyszCB}y#UiDe+goeX@d_A6{Pp{MK zA>knRNTgBRW6ua0u9&S^WYn6>K6^#8eH$&1(eJJ^rp2f+2p8lHC}$){!(qn#o}6gB zXN~^H;8T9d!%<#O7)kjjQ+~j}{#|UA%2UG}!=bO$yD2c_GI4T+$21Pb6DC|xJ!L$p zaw<@)){LTeSN){p4{I{`RKLhBlBcjwL9+KU$z$=I2(EPJWlsxqzM{5*o*iu~fIK}s zDZh4n1`Zl&l~m3%{EOi+rlj0Iw@$JZ`df0X#-$}!QYfmrD**OWb+l`Lk$9hyWT=j> z=Qo?ovzW$$Ata>(xFeB_eT_%o%l2RhZA0C<6Pxj?;X@MO&T*r6Gxs1EXRiOJ?1&h(wT&~VcLVkR_1Wf7YoKbRuj z#WB*CYYp}+t#UWooon{H@#iuaqMO!wjSBv%!>9${u#DECf;Gc?;V5JiMPE;|#4SsE zz1>9s+++gx1F5J<-lPyWc>q(ZZJ9s6XGQd4>;sCB)+;dUT;N-GUFL zz_~*T1ifJ;tU}2*C7SU_{;fMX9P?~ zcIp8;FZcdz(dHXzny0&2dUWNv{i0t;2|W8f-2@BY=iHn+5igkRa3=x$kR+~|QjvU# zOu0@-SrW-&GbY13sNf84ta9PD)rchGrgop_Nz@H}(l@5^I3PT!sIA&{-L)t_d5AIg zOL{HOQ#6u-B>b7h+DaTs%-7GOdJSbGhef}mj>mIA)8_OVcInN$p(o1vF0fJYvj1xP zAQgTqw8CL|GDM`Qnl!3L3MqPZqCF;a-EwlK-!I<#?R$(gP@DT%LK}l%{ORAoANXq zs&b%A?RU;JFDU`+HoFET;zje?{R5hx3Xt3Y9uX^(YV*&wk=5k&Kkp5Rk`SChQY1`U zPfonN%C7|pe|h=hHR79sA{?!S8Y!`bt_R#9Z!fdb(u=i~eunUuT~Dyl=>EmPqfwgv z@q;Nv6Qvf6geSAD8fNvOQP@3S;{LcbQd})M8IFQQ-urcE?Xmw~LtSw*7`WvC`2u+% zb1F|MBXM69dll^%JO(cRRX6fd#x^8Kc19neJkjF)uTC9L53L0r8^masTgBb*c`2Xv z29D={1zdE{nNt60# zIyJtpBS;B@{j_^gEfYfcQ#EN_mX#Qve>a6rfdQT}n30^@Y_sP=xik(7mThdtYX}-p zfGi7cG@SRjUA$9pzQPzJbW5j*%wOX&l5&o!T_SuNa8KQ0CTY3MXg?P9x^#F&dw;>I zMer(!;34?D+fHYlYb?D{@C9O)Fc`UlFb-WD1N$t4IGGBLd3Xh6)AF;fQqSl~|LW46 zsE0zYSpDr}Lk~Wf@|E9T**f>ye>>{lirLh~83KCwMCH>v7s@6ag;3?sAn^V2_OF`) zrCdD|Zns!9?UZ#k0VURUHOtE4Mm@#*&y>O3XalWYuP5R=&7a9qyIE)7OK;sMaCyww z*DZH1L9eUo*Y-j{$!pn?t8cK7bXc$qD}Pwd4q@7pKF(qDYi9N+O1X%QAuMM+cSC;J z-|%djb_Rl7%I%Wht3~u|=-PIozb0GCcBbZ=AFHBYXE!A4_(SV>l{0h{pJu@{82g&P z<(_!3qfb_dPI|&vmsB56v^*n=UgHZzi4D^;jx_k z=37Kd>={Qht6tx&DSW29bsk-FSUGmtRU6;kLm0-HQyjCQwGuT6_0G5Ns|Kix4;cQ~ z7#`ubv#2_tKCLDuvVrwUX8+8Q>vI-(e1P|eNI%GivWp!Z^X!ce1=+LX&96nZ+jFtH zOuoEiEBy>?CoJTaZ8t&A%ZeX+O?!ep|EqP}P=}7CVQ*sAVWDzq`x|{*SH7>E7C1Bc z<4Nxq=W|B!OEoH7j_;SC2&LU%UgHd0!Osdgjl*5+CyzzCj;Dsn0Y4yqCY=$x?l6tO zxY{AFbF)a6`xlOw%p-aNLx@Kb^!7;ZFw>7KHLXg6K0&swV(}Bj<;;jlU%6C=5SHP& z$9#5F(G1!Ss9`7RYMI}YhGcJLh%6nnAafhRrj{8;nbR<^j7?1nwxa$_lH95qVI4`> zDerlYtcm?8aHBfkPsOM}B74zuXSBX+#57C7Ibm9L2Mv9)eeug($e9CrDRz-O;sopH zB%=O1t3B$3W<%CkMj!j9e$8(vkk^jrt3FmX_r|O@8^DtMtojMs2MPUuYu(b|(^!|W z`<=7wpMzXUU)*gBR1NP$!RY-FJvuwG`*=e1-S-RIpuAl&)37`l);9c$JM~u#ev%9s zrgJr4%% z=vGGx9@5G_VB|{sj;!rryVq%-*H$dughu26=}M1!s>CMDjEO{BZ^2c+e2A2*8pNqJ z0?h{S1-2bZ#86=1zRE~=v)>2yjGrkO(G0Pg7W^q;7Ia^MH8o-=LiV(;An102C4r|H zTFeEU^i=cOBqfRKggFdZne*t17XSOBOv`JU!coA+KjSznRSzDt}wAhb%m_M#7WM9S5MzV{y zK+MWSxc%GlO>tSl(E$&35J%?{d}7Ysced+Byei%2$?bkfo+l5NXu%(2&L6`D!LT+p z8+=b;(*`h_FrcuzP(wS_%{Xr$_lHc#L*}lJ6(hdF%H6(b2I=Ip!Exi{?~7)*zZ^2p zYzVw|y-(vd88X&BQDmfH70nS%ALA9mOCaf4F*L=4i2c~8^ieyG&mLquvWjHS;qscb z1FVTL%UGHLWsUS4QC4x;(Qz}}q*H|2^`TSe1cXTsqWpCNB|^~QYS`^YFUGznM4YH( zX#0Uun3c|lIBAGT2+!Q_{i8YWWjkAt|Hb=tRd-Fb9Vqb{mL!$2qk9r<?_`Z;DkOAPJbxH0{{io= zZ~NbD@#{3Fw$(DfggF9+2jQd4+}h9OAnZwoDw3W_etDJ6Z9i0=$(PkH*DQ%v+Qt^%ZhUF4^uq^}3xX1HD4-Zs0hmHdqQw_E(DAb%ACK_$X2%lr|n z$*yFv$`;8*>%45m4I3Cq>dyRL&7*3d3!MFku9ENi$TwQ=KsZ5e zD3WWg=R6@F@*G%zklf@QP%dFJ6*2WBV4eiI$qVmKHCXo)hPz$uqLhus1y64m<#_mK zD-GF5dx>jJsx=Su#^_z#xi=CRT7I2?vT@#_W@}aVG=`A< z+23Ccf{eIoocMCwuCQ}B^|wp<801(@^6K{{Rf;7uOYls`%YS(P5UD5kpa{0sV`=qU z8~SpS)aqgTr}jxg^@B3sd=JClt)eg{%q>u>(>poix64Jg!)hE7zDZId^<2AJsgd=o zw`oO%xr=hHxVr_)E74D_nthfP+$zKtO0ek_X@E>1cQV8ID}M)BO$0ZxDnj}9?;AXW z48O+x`-l5u4M&?_tK)v2Hdnk|*>3Rk=CGv*FPD5T#(c3CMb02AE{}^S8Pe&;>CsPl zUZ(g1ufLY>h7gYHX*&e`8GcODJMt0N0Ku&qe7%9JBF${d{4f{xxzjn}jsk@RbBc%Z^~Idh$Ng1?r8%|+(gbII%mGEr4 zo8=&dTXzEAEYmg_Wn1mY@;iOBnso90aH4ha%N3#;R`Ph9d3D&*_!c}o+Gbi8Vuq&O zuZgTc3$RBFxjan`VR_lbKd*HdmLU4fWYzlSi^Gv7b7n4N)eKkI-wG`3rN=uQ#+*kt za$mr!G^$0ivVvR6wtD_=tXoLe$IZ``7N<1uNsG8Ez7CvdBzI-1z0FxzbW8G)nRh%F zxz+Orwp)5y=S#AxRWP- zhOvfvfAUPPHdC*c(EYa6r(TVy2=<{C$#Y{An7CExS#dE-3f~*WQ=6i8ui09uYa38C z<>KEOxXnZX>I_O-qWqItZW3M+rLZRE=x*|mC9IbE@ll8CHAs}uWdBKlN5_CyXRgDn zwR~cA{PJ3tc(SuQoBUU!f0%U#Hd`{W5`Q1Z_jD|NfTCk3 zFOxyj;j8+p`@2aelPbk^k8rCj$Q;CqD_72G!lH^&jz4SFAnnle4_kf2WQAW>$*>mj0` z|Lc^N&jZi(V2=>R4oc@h1*bMoT+y=h_A%-?3lU^a&sj{}fuwjKUU1lfn_?bQSzI`p zFj~B4FZ>0)bmT%qMdiJ& zeb*NhhHp=MvFqHDHm}-S2=XqER1A6@1dEq8p=)}>#2=9NL4Jn-hZ(!SFSQ)b;-#N< zH(C7WwL>MQC@*&&sxCW1c#3zhFCu?eTEm!HRIen3K08V@Ou z#xr5H+7?NJcDd``%Au=P`lYn<7!#aknI&!GZ71oT?=QSL)ZPDVSja$BV23nKDHu>A zHrQITFgWPEueyo-`b8zZ&hS4+AJ%xFe-ex5^{K5J#3BnX*-4XGP9w|z1EasF4 zTdIr4VY~LN7qwiZgf29itdfQ(#)PMkJ9#IjuT$SH8*;@(ZN6JGWHo5ZlA23>$OdxN zy(6;bmeWVsgw_#n9oM_FjZyI>apGo^p_eXXO(1 z?%d+4kyEcYkzRrdpFk@SFVzJ5uiSJJR{2$rp<9#Uis%xN&%7jRaP)DRmw@*Trm8Nx zgs$pxb#*3+Ygi!F>Rt9)!yF|hsLD@Ca1VM1jC+m7A`>5MFJQGA&sW~?jg=wP(~2iw z3`5%uV)uoc4wNda8ai$nw9Tvb7+*l)JokDTk%?Whrvfo~C#%`)_jL=is^=G+56UE; ze2Q!{+H}tWB@51H%hbrYu9ykFDCgL9*VRXpLJ8eo>J9l1*X_`o?{ac#F1-51m`|*$ z&S(J1o=HG5hvaL59j1(Rsf_n{P)qR>`oRQyTpxpGI4x4oyk#8F{d|-;`Fcrqi@#8Z zIb@cP<|Llz-gueSaa#LfaH}QIm@D&1e*p1D1JcaSjSjoqN^-3C9qUJfsSLn!EFOwg z_$?8tjYQrxC57|g+jFCh4NULa488p5;%L;^(*UwsO3N#zJ` zfj)@vO2S->k2XH_1jRRV6ghtqB=BG?{Q2Thpe*$PmRId^ran zPwEB6k;B(L-9BrJD=?%(d|Ii*T79R_1Gi55marduE&1ye>t49LjJk9E+*4TiEb-Tu zsk?_T5^a4!8n(`Wh0n$22hgpb^Ii%Nc~PZg703fAQge>VR1dGw3jqT~j>^jM^iV}l zeNbGMZQr7JLbe5hvG~23NSAA73XlHzt$dRC=3FP{=!%uKZ)V<~49aH~v|_}OM{iGX z7rKC8yOq5-xU~Ls9i`l57K#O_TxQE#mM9px)5H_`pf_@BCy)}wjk&WzD z^m24(T0Hmofs)Q71+q)Wff+0(LO^Wd2q`_QY_v=2-(EkHPZ}CF~@lw-7@Ef9S)CPLeB8hX&l}v0LsiPOe-a z9p0GvTr}x4o-O?vgts`REWB9tazJd? zoxJ((en?IFuv4Q?g*@c2pY+d%1QH<9R7|C_&`wsl%VCe8!1Gm*yU$C!isJZ*U^PSYwqEcC-4g9U7&{rbFi7NEyGwKhGkCH%upmP_j_UF4whhwK0?K%e*T0q#XDe zv+SZ4IM2n(7g^D=Ay>qwy9k4hGF6*xBfOd8i}f-r-ZShCQj^qqbd*u=|FMUYS49yc ztD?vo&g4)>bLzK0pDRLLHy;<1hgj5{I)ys;EO<+kIZ1z@#rb<%_fRxF3HbiAH*0H% zuw1zXYS-?)xMiXDZ8D=DtN2F%>JnOu4^Ig&FR@+OY-w&9=x3I|`JNbz)c^g7{r(6h z$wSD+e0hu0l2E%tonAR14ig4nMMuEKrL>ZNPIU!VSBUE5_e)mgKP9=l^^p8%Q)G8T zXL>{`lyIr=$KSNUKTe==w7#Resk75s+?CIUUcBCH`n@?&-+|jR*o!*Q!%-{Djh0X{ zV@XYCV(-{d4h4SHAVcRK@a|0RQ9~KiM(`=_Noui>&cV}~SIESk%S0e8U zqj#Rl&Gp@ z1g@xmDiB6H_hI`*wdGJ^2#Xq|qAk0wD&L#hEnfB`Z`iOiqoXOIG_baZM+`_0nC2p3 zokZfu)ay8k#22}4Diyx@M~X^bRP{+X%}^>urQE|`y7^y^q1cMlyXT=W+A*-{CXxA$ zTfMsYZ`Oge%IwYNq?PKYs`*DV2zHiPse}@4+0tFsXi5BJAyBsaQjNe)4Nf(n>d--> z9D6TMGJ~-PNHNdSP(lNDF54E+tNBP90eCKO{gZEatI+uK5BnswDQoLg*+TalaL+lD z&ScSN+~1v+M=q5v@@xP4S6CCh6YpfyW;`}cb+tXvFdax7P>d6|nvj`P+_K~syD z7BFh*T1MiY|JnC=e(!hI*Sm8JW=WEU{FBh9t`owNn4i|i?*E$2g*_nFV2cDiJdo0l zDFNlGNY`*$#E@Y%v)J&U(Q6^e>iP0Jk7GqM5-Rt3P&$_%sND)SnPErQ3p|u+2Z;c5 z+N#Uh6WkCYsyI}V+>|>1Bt;WjEj#G0F{bj!)TQfmdL$`y>T}^RZma8La^kWp5gf?( z3`1RBU(%M$0K-P z!R6AU7yHTS*?RHQ)P+x4j^>=aG}dQQLPTd%T-{9#cLf}BR%LaZ8mBl7TMHYG_P??J zJv%r_Fg^@p-F-X~%nBOTw1=iRj4)Sfn0Jfl7h$ep)QzqMPT<4v9YBA8@stcM5p0D| zvhw|ln_xv+4$v}0icOLP4kcW8FqGxQUVX$rwt(Q!MLR+YAGEQdEu(pku7!yX3#WKG zVif5Hv47%yb(^NV^n9(SFloKt!ev7WL2aPT$=e46&*do zyBv6+w|QdlXIRk_&sJ5rLqS-)w66O~F`<*VABXv{^`t_iS|;l;rcW}bzv7RY9+%UF zV7+H0446IX|CqfPTS$Oc?o6%>rx&Yrnoex_@iXk_N*t!Y>f%V25nsEOja>9>DhNh< zPaYEFp!-|UfKi2KNVd{M@d2BAsj1hK%P?0eN0!GSP|p;q<(u z6_qeJy^0&bhafPh8o?Qer*BxBNzJ}l09t}|3eq4`bdaifbdqD`#M2`_blMCL|69iB zY$UiPJL(~Ybx?HK61z>Fn7h%Pn(%8Myz}Q?kOuSsI?X_$=D=-W!wtCM5}ZnPrk4{0ajC)4SJ zzPR!8%4_TjTtpA|XfJa&LAL#vX2)zwjeCqy&DR29 zX^Y=R9hPXDuJa{Ns2O2+cv2s#o;^BswE0-D-ocbBno)-r|A>is_<3p@=*dbL8JkqZ z!9g|s%C6OJR!!iOntcCGe#f(G^&nO9!7%8{gtE>DRL5@P%Ab+o!2 zTsw}vW>caC+A29-*URlt8beobjH2>X@HuxIy+2N4+WrFTg!~^LmKN9nJAi6mAr0y>v762K6!3Y zo2z=Q=i>1FJVvMa&q$#HXm=}2T_}EdI}pPMdgs*kwZ<=&E=;I`D_Y}TJ&TK{MeI?N zXfLb1VdvnV3?baIvxCf;NUSP?ik?;1+kT3NL-$Y4_#&Ane}W*?1XsM~C}$8^G-K?v zbQ`^W{XLVZ2Sp2K?It#JB=Gd0qR2b$!1(LqLZ`$3LSPbw-6Nyx9DBExGX~*rg@u4? zyQl8J-5*bu#XiuGO-ZuRYtzXt>8R@yOh}Hs1F)+4Vln|97xp9dkJkkZxT+iz~v+ zm0A}eFg4Yom|Bix*0r8?FZh1-JO&aOa_B(Sb4*TpEkL&}MzoNWOPLTb;W4 z&h3gAIIdoQIsW_reBs%z_&}IW>zUXCeh2Psd+}?pIX%6+9?T3=Jmj6xE!ekHS=0Bj zFOPgbT?HCo<-Y`dq>xtDQqU~Mrx%PTA@}JXN&2ZuCv5BGOi~`SnyatC38!q)Vi2zSa5XRt0-za`p{j_gY$Y24GeSx8+Ko#sq2Gh z;lYi2z~|rpWnRb2Qh&Cr(Sn^h{t3XzRsmcsydQf~8UsfPSk!#ppO5sut8oN}N+18Tpv>OZmkvH1$`;W7H8xw_x zCjZdfp+fKr;7|qw);J&8_o*Y+Yr+zgiw1sH10ZSXFQjrPjISk{q`<#+kM-ji`Hhp8 z%=gGwENHT`c8%S4)l$?ZUPUxSTw_Pujat_{QHDu=dC%q^!z}1EEB8oN*gR60skQ4s z3CL7)tDbOQ4g$q9*yw#e^CX8DdAe=)5lGcJa0y?*0TfLWroRc5+#OI_u0b0EMXeE5 z4{BP4?`^mr6nt9f&(>5xuJxS$jT4zlGny+MFrbB)+0uT<& z_Ihx!gDDCt=gR{fC2;l)v^W0%kmaW_cWr|J(bn5-ObNfwDyFYiOvPY**myCh@r1=Y z5=jfS`NmVqS&p1&4hY93CU=$xABV9_+V1hV?0`>h$8&!?8GG+8>&cELyIJ6+xywo{ z+0GM#!@R?Km%Wg5n`XwLKF1J$&trOY{Sj;w^xEz5=6cBpTA(7RZA*TFw0L=duEl0wH zUrFqKq(TUa_*>%xR9Nx!GcmOFb1QKdbcx-`@$LH8x_zgzmnhxut!T0UQVz6RJN__Iy~HR*VFb3CY$)N9_ZxOXy0R} z)lB;_Z4xgJpd&UTl%Oj7T(`&>lm|b-yJbN^LBhz^G;Z^q4b?3KAwWdq?`i1Qc*ZEz z*4j$T$4>DoGbBxK>~FvKJt?noWl^|520 zyHeOmx{b`@p}Z_4;kn9%(7W0_(N^ew@BP$o7N1B*=(OrW_THirDi5=E3q)BRemlHs zzIe%rg8d9l2y@_^<@WX1RU29T&hW6~vI;RE2w5Ujun&3ZrU>cji*W)~JRufUsCkzw z-0D<79kUIDk!$TIjYGw|%dFHUtn~%4ie;}>7wQ4L$|&%=9}{ew8c}zcxBl~N06&&Al(h)3;3cwC z7dM#!;Jf~s)h*gU>iJ5}sn!6To`7XWbUR5yfz8p?mUmXv4T((i&J?;T;)%ZebkElrYUNLnpMdKk?`fgKOli-tn z#2nkxYwT|4H}q#a2bM_=ME+5hWoVg zQ>+;wv%LeZ*Xp?qI15Aa_XP@Z7$Ea50)TJO;eGe!8bB*w*%tkU@1M6Lt3JVQEr!mK zk7n(=4Ql3^&6*oz1e7gc-eT*QI|D-dTx)lJDdjewNM6F`J8ktPLLK{OFGMbJr{tfF z1|u6mV8s*ma-^u;vmYFY{09-o#l7KAoD)1T3JG^!>xhL+sxWRN4KPn@-2gE9)zbw@ zLhvqep3wlQ|L{EKP6K9@?N!vmCmF5qYQUGvyTuw`h>}K;X(^NO9eTyzxgwthOBVXJ zXn}F>A1LKsi=KJr&4J)edfzRF2Tvj8NpMrj2s(m0Nn^+`Y`m!e!QgsLtD%x~#ZKj# zhIcSa5&y`Ng?{;^lFepT4p-*?p(9EL;1^*#sjG-&RPIqG)=SJ&s^(5iiV~wEWw_BN zRbaFqUS|zyQS{Zx_8?*f6du##|FsS^G^wL5Js7%4a0!j1Cm@n*R5>k)7vexA4`(jU zC6a4~i5-gq1r`&!-g=)kqq38X>4L-ag%#IAVWX&YUu4}B*ceMgWAH@I&}XMmX%J>}?KxWL=S5x-i4qRp9$&7QGPLK9Ug+`h09z!H82HdBzv z#SVD|Yu{;R@na>g@_&nfYs7#zV}v~~v8pJPIKl6#yA)Tz^2?D$Z6F98w5kpmEh1u_ zjRL3H%M}Z`oO~p@17A3oj#3^9tYz$5L@5}!)2Xy7%P~ + + \ No newline at end of file diff --git a/skills/databricks-unitycatalog/references/access-control.md b/skills/databricks-unitycatalog/references/access-control.md new file mode 100644 index 0000000..370c0f0 --- /dev/null +++ b/skills/databricks-unitycatalog/references/access-control.md @@ -0,0 +1,75 @@ +# Access control (grants, privileges, RLS/CLS) + +## When to use this reference + +Use this doc for: + +- Grant/revoke workflows (`GRANT`, `REVOKE`, `SHOW GRANTS`) +- “I can’t see the table” vs “I can’t query the table” debugging +- Permissions on volumes / external locations (file privileges) +- Row and column-level security (row filters, column masks) + +## Core concepts (keep straight) + +- **Privileges** are granted on UC securables (catalogs, schemas, tables/views, volumes, external locations, functions, etc.). +- **Discovery** can be separated from data access via `BROWSE`. +- **Namespace traversal** often requires `USE CATALOG` + `USE SCHEMA` even when `SELECT` exists. +- **Ownership** is not the same as `MANAGE` (workspaces differ on what each enables). +- **File privileges gotcha**: **`WRITE FILES` requires `READ FILES`**. + +## Quick checklist: “why can’t user X query object Y?” + +1. Confirm the user/principal identity (``). +2. Check grants on: + - catalog + schema (traversal / discovery) + - the target object (table/view/volume/external location) +3. If the error mentions files/paths, verify file privileges (`READ FILES`, `WRITE FILES`) and underlying external location grants. +4. If the query returns fewer rows or masked values, check row filters / column masks. + +## Common SQL patterns + +```sql +-- Inspect grants (examples) +SHOW GRANTS ON CATALOG ; +SHOW GRANTS ON SCHEMA .; +SHOW GRANTS ON TABLE ..; +SHOW GRANTS ON VIEW ..; +SHOW GRANTS ON VOLUME ..; + +-- Minimal traversal + discovery (lets users find objects) +GRANT USE CATALOG ON CATALOG TO ``; +GRANT USE SCHEMA ON SCHEMA . TO ``; +GRANT BROWSE ON CATALOG TO ``; + +-- Data access +GRANT SELECT ON TABLE ..
TO ``; + +-- Revoke +REVOKE SELECT ON TABLE ..
FROM ``; +``` + +### Troubleshooting: “not found” vs “permission denied” + +- **“Not found” / can’t list** often means missing `USE CATALOG` / `USE SCHEMA` and/or `BROWSE`. +- **“Permission denied” on query** usually means missing `SELECT`, or a denied row/column policy, or file privileges on underlying storage paths. + +## `ALL PRIVILEGES` notes + +Treat `ALL PRIVILEGES` as a convenience that depends on object type and platform semantics. Prefer granting only what is required and verifying with `SHOW GRANTS`. + +## Ownership vs `MANAGE` + +Document which operations require ownership vs `MANAGE` in your environment. Do not assume one implies the other. + +## RLS/CLS: row filters + column masks + +Unity Catalog can enforce: + +- **Row filters**: restrict which rows a principal can see +- **Column masks**: redact/transform specific columns + +Debug workflow: + +- Start with a minimal query selecting non-sensitive columns +- If results differ by principal, inspect applicable row/column policies +- Confirm base privileges first (`USE CATALOG`, `USE SCHEMA`, `SELECT`) diff --git a/skills/databricks-unitycatalog/references/ai-ml-objects.md b/skills/databricks-unitycatalog/references/ai-ml-objects.md new file mode 100644 index 0000000..b132b0b --- /dev/null +++ b/skills/databricks-unitycatalog/references/ai-ml-objects.md @@ -0,0 +1,62 @@ +# AI & ML objects in Unity Catalog (models, functions, vector, features) + +## When to use this reference + +Use this doc when working with UC-governed AI/ML primitives: + +- registered models +- UC functions (including those used as governed “tools”) +- vector search indexes +- feature tables and online store publishing (if applicable) + +## Registered models (governance mindset) + +UC can govern registered models and their lifecycle (versions, aliases/stages depending on setup). Treat model governance similarly to table governance: + +- who can read / write / deploy +- how changes are audited +- how environments (dev/stage/prod) are separated + +## UC functions as governed tools + +UC functions can be a controlled “tool surface” when used intentionally. + +Checklist: + +- Add a clear `COMMENT` describing safe usage and inputs/outputs. +- Ensure callers have `EXECUTE` privilege (and only what they need). +- Avoid designs that require embedding secrets in function bodies or configs. + +## Python UDFs / UDTFs (validate constraints early) + +Support, packaging, and runtime constraints vary by environment. Validate: + +- runtime compatibility +- dependency strategy (what can/can’t be packaged) +- permissions (who can create/alter/execute) + +## Vector Search indexes + +Common patterns: + +- direct index over data +- Delta Sync-managed refresh + +Pick based on freshness requirements and operational overhead. + +## Feature tables / online store publishing + +Typical workflow: + +- curate feature tables with stable keys and definitions +- publish/sync to an online store (if used) + +Confirm which feature APIs your workspace supports and which principal will run publish/sync jobs (human vs service principal). + +## External access from functions/UDFs + +If functions/UDFs access external cloud services: + +- keep credentials out of code (no embedded tokens/secrets) +- confirm egress/networking policies allow access +- enforce least privilege and auditability diff --git a/skills/databricks-unitycatalog/references/lineage-and-observability.md b/skills/databricks-unitycatalog/references/lineage-and-observability.md new file mode 100644 index 0000000..20672e1 --- /dev/null +++ b/skills/databricks-unitycatalog/references/lineage-and-observability.md @@ -0,0 +1,66 @@ +# Lineage & observability (metadata, tags, audit, billing) + +## When to use this reference + +Use this doc when you need to: + +- Verify lineage exists for a table/model/dashboard/pipeline +- Bring lineage from external systems (or document gaps) +- Apply or audit tags (system vs governed) +- Investigate access and permission changes via audit logs +- Attribute costs using system billing tables + +## Automated lineage (how to reason about it) + +Unity Catalog can capture lineage across common compute and platform surfaces (tables, pipelines, dashboards, models). Coverage varies by feature/integration. + +Checklist: + +- Validate lineage on a representative object first (don’t assume global coverage). +- If lineage is missing, determine whether it’s a tooling gap, a permissions gap, or an unsupported integration path. + +## External lineage (BYO) + +For systems outside Databricks (BI tools, SaaS sources, external warehouses), use external lineage ingestion where available. If not possible, document: + +- what lineage will remain missing +- what identifiers can be used to correlate (table names, URLs, workbook IDs, etc.) + +## Tags (system vs governed) + +- **System tags**: platform-generated metadata. +- **Governed tags**: curated taxonomy with controlled assignment. + +When using governed tags, principals may require privileges such as: + +- `APPLY TAG` +- an assignment permission (often called `ASSIGN`) depending on the governed-tag system in use + +## Audit logs (`system.access.audit`) + +Use audit logs to answer “who did what, when” and to diagnose unexpected permission/access patterns. + +```sql +-- Recent grant/revoke-related actions +SELECT * +FROM system.access.audit +WHERE event_time >= current_timestamp() - INTERVAL 7 DAYS + AND ( + lower(action_name) LIKE '%grant%' + OR lower(action_name) LIKE '%revoke%' + ) +ORDER BY event_time DESC +LIMIT 200; +``` + +## Billing / cost attribution (`system.billing.usage`) + +Use usage tables for cost attribution by workspace, identity, SKU, and time range. + +```sql +SELECT * +FROM system.billing.usage +WHERE usage_start_time >= current_timestamp() - INTERVAL 30 DAYS +ORDER BY usage_start_time DESC +LIMIT 200; +``` diff --git a/skills/databricks-unitycatalog/references/namespace-and-objects.md b/skills/databricks-unitycatalog/references/namespace-and-objects.md new file mode 100644 index 0000000..d17b0f7 --- /dev/null +++ b/skills/databricks-unitycatalog/references/namespace-and-objects.md @@ -0,0 +1,76 @@ +# Namespace & objects (Unity Catalog) + +## When to use this reference + +Use this doc when you need to: + +- Navigate the 3-level namespace (`catalog.schema.table`) +- Inventory catalogs/schemas/tables quickly +- Search metadata at scale via `information_schema` +- Decide between managed vs external tables, and understand view types + +## Core model: 3-level namespace + +- **Fully-qualified**: `catalog.schema.table` +- Most governance and discovery flows require the ability to traverse the namespace (often `USE CATALOG` + `USE SCHEMA`, and sometimes `BROWSE`). + +## CLI discovery (fastest for inventory) + +Always include `--profile `. + +```bash +# list catalogs +databricks catalogs list --profile + +# list schemas in a catalog (⚠️ positional arg) +databricks schemas list --profile + +# list tables in a schema (⚠️ positional args) +databricks tables list --profile + +# inspect a table +databricks tables get ..
--profile +``` + +### CLI gotcha: positional args + +Many UC commands use **positional** arguments (e.g. `schemas list `). Do not invent flags like `--catalog-name` unless `--help` shows them. + +## SQL discovery via `information_schema` (best for search) + +Run on a SQL warehouse. + +```sql +-- Find tables by name pattern +SELECT table_catalog, table_schema, table_name +FROM system.information_schema.tables +WHERE lower(table_name) LIKE '%customer%'; + +-- Find columns across the lakehouse (handy for “where is field X?”) +SELECT table_catalog, table_schema, table_name, column_name, data_type +FROM system.information_schema.columns +WHERE lower(column_name) LIKE '%email%'; + +-- Inspect columns for one table +SELECT column_name, data_type, is_nullable, comment +FROM system.information_schema.columns +WHERE table_catalog = '' + AND table_schema = '' + AND table_name = '
' +ORDER BY ordinal_position; +``` + +If `system.information_schema` is unavailable, fall back to per-catalog `information_schema` (availability varies by workspace and configuration). + +## Managed vs external tables (decision guide) + +- **Managed tables**: simplest ops; UC controls the storage lifecycle. +- **External tables**: data lives in customer-controlled cloud storage; common for shared paths, multi-tool interoperability, and explicit storage ownership. + +Default to **managed** unless you have a specific requirement to own the underlying storage path and lifecycle. + +## Views, materialized views, metric views (quick mental model) + +- **Views**: computed at query time; no storage of results. +- **Materialized views**: persisted results to accelerate repeated workloads (refresh semantics vary). +- **Metric views**: newer abstraction; verify feature availability and semantics in the target workspace before relying on it. diff --git a/skills/databricks-unitycatalog/references/operations-and-migration.md b/skills/databricks-unitycatalog/references/operations-and-migration.md new file mode 100644 index 0000000..63a2e12 --- /dev/null +++ b/skills/databricks-unitycatalog/references/operations-and-migration.md @@ -0,0 +1,66 @@ +# Operations & migration (maintenance, time travel, constraints, clone) + +## When to use this reference + +Use this doc for day-2 table operations and migrations: + +- maintenance (`OPTIMIZE`, `VACUUM`, clustering) +- time travel for debugging/recovery +- migration of legacy (Hive) tables into UC +- constraints and cloning strategies + +## Maintenance operations (what to be careful about) + +Common operations: + +- `OPTIMIZE`: improves data layout / compacts files for performance. +- `VACUUM`: deletes old data files; **verify retention/compliance** before changing defaults. +- liquid clustering / auto-clustering: capability varies; confirm the workspace’s current behavior and defaults. + +## Time travel (debugging + recovery) + +Be mindful of retention and any runtime-specific limitations. + +```sql +-- version-based +SELECT * +FROM ..
VERSION AS OF 123; + +-- timestamp-based +SELECT * +FROM ..
TIMESTAMP AS OF '2026-01-01T00:00:00Z'; +``` + +Debug checklist: + +- Confirm the queried version/timestamp is within retention. +- If results differ across environments, check table properties and retention configuration. + +## Predictive optimization + +Enablement may be per-table, per-schema, or per-catalog depending on workspace defaults and policy. Verify what’s enabled before assuming optimizations will occur automatically. + +## Migrating Hive tables to Unity Catalog (SYNC workflow) + +Treat migrations as a controlled change: + +- Validate schema compatibility. +- Map permissions intentionally (don’t assume inheritance matches legacy ACLs). +- Inventory downstream dependencies (jobs, dashboards, notebooks, apps). +- Migrate a small subset first, then expand. + +## Constraints (validate support + enforcement) + +Common constraint types: + +- `NOT NULL`, `CHECK` +- `PRIMARY KEY`, `FOREIGN KEY` + +Support and enforcement semantics can vary; validate behavior in the target workspace before depending on constraint enforcement. + +## CLONE (deep vs shallow) + +- **Shallow clone**: fast; depends on source data retention and access to underlying files. +- **Deep clone**: copies data; safer isolation, higher cost. + +Pick based on isolation and retention guarantees you need. diff --git a/skills/databricks-unitycatalog/references/storage-and-connections.md b/skills/databricks-unitycatalog/references/storage-and-connections.md new file mode 100644 index 0000000..370048d --- /dev/null +++ b/skills/databricks-unitycatalog/references/storage-and-connections.md @@ -0,0 +1,60 @@ +# Storage & connections (credentials, external locations, federation, sharing) + +## When to use this reference + +Use this doc when you’re working with governed access to storage and external systems: + +- storage credentials (identity used to access cloud storage) +- external locations (governed mapping to cloud paths) +- federation / foreign catalogs via connections +- Delta Sharing as provider or recipient + +## Storage credentials (what they are) + +Storage credentials define **how Databricks authenticates to cloud storage** for governed access. Commonly backed by: + +- managed identity +- service principal + +Keep all examples obfuscated (no real workspace URLs, account names, or IDs). + +## External locations (the key governance primitive) + +External locations bind a cloud storage URL to a UC securable so permissions can be managed centrally. + +### Operational guidelines + +- Validate location access at creation time where possible. +- Use **read-only** locations for shared datasets. +- For write-enabled locations, explicitly grant file privileges. + +### Gotcha: file privileges + +**`WRITE FILES` requires `READ FILES`**. Always grant both when enabling writes. + +## Federation / foreign catalogs (via connections) + +Connections can expose external systems as foreign catalogs. Before building workflows: + +- confirm supported operations (read-only vs read/write) +- confirm identity and credential scope used by the connection +- confirm performance expectations and pushdown behavior + +## Iceberg REST catalog (optional integration) + +Iceberg REST catalog support (especially writes) varies by workspace and connector maturity. Treat as an optional integration and verify current support before committing to it. + +## Delta Sharing (provider/recipient mental model) + +Objects and workflows include: + +- providers +- shares +- recipients +- token rotation / credential hygiene + +Troubleshooting checklist: + +- Identify the role: **provider** vs **recipient** +- Identify which principal is used (human, service principal, workspace identity) +- Confirm that the recipient’s token/credentials are current and stored securely (no embedded secrets in code) diff --git a/skills/databricks-unitycatalog/references/volumes.md b/skills/databricks-unitycatalog/references/volumes.md new file mode 100644 index 0000000..7c3b172 --- /dev/null +++ b/skills/databricks-unitycatalog/references/volumes.md @@ -0,0 +1,47 @@ +# Volumes (managed vs external) + +## When to use this reference + +Use this doc when you need UC-governed file access for: + +- notebooks / jobs reading and writing files +- data ingestion/export paths governed by UC +- sharing “known-good” paths with clear permissions (instead of ad-hoc mounts) + +## Managed vs external volumes (decision guide) + +- **Managed volume**: simplest lifecycle; storage managed by Databricks/UC. +- **External volume**: backed by customer-owned cloud storage (via an external location); best when you must control the underlying path and lifecycle. + +Default to **managed** unless you have a clear reason to control the cloud path. + +## Create volumes (SQL) + +```sql +-- Managed volume +CREATE VOLUME ..; + +-- External volume (location is a cloud path; cloud-specific scheme varies) +CREATE VOLUME .. +LOCATION '//'; +``` + +## Use volumes in code: canonical paths + +Prefer the `/Volumes/...` path so code is portable across notebooks/jobs: + +- `/Volumes////some/file.parquet` + +`dbutils.fs` can be used as an API surface, but the **path** should still typically be a `/Volumes/...` path. + +## Permissions: the two-layer model (common failure source) + +When something fails, check both layers: + +- **Volume grants** (the UC object you read/write) +- **External location grants** (for external volumes) + +### Gotchas + +- **`WRITE FILES` requires `READ FILES`** (grant both). +- If users can list but not read, you may be missing file privileges or underlying external location permissions.