diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 99067675b8251..f36f1164e5986 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -211,7 +211,7 @@ runs: EOF - name: Install protoc - if: ${{ inputs.protoc == 'true' }} + if: ${{ inputs.protoc == 'true' || env.VDEV_NEEDS_COMPILE == 'true' }} shell: bash run: | echo "Installing protoc" diff --git a/.github/workflows/changes.yml b/.github/workflows/changes.yml index 256b74969cab6..a057df36dc312 100644 --- a/.github/workflows/changes.yml +++ b/.github/workflows/changes.yml @@ -222,7 +222,8 @@ jobs: component_docs: - 'scripts/generate-component-docs.rb' - "vdev/**" - - 'website/cue/**/base/**.cue' + - 'website/cue/**/generated/**.cue' + - 'website/cue/**/functions/*.cue' - ".github/workflows/changes.yml" markdown: - '**/**.md' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 114321ae28242..a0437acecace8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -146,8 +146,8 @@ jobs: markdownlint: true - run: make check-markdown - check-component-docs: - name: Check Component Docs + check-generated-docs: + name: Check Generated Docs runs-on: ubuntu-24.04-8core if: ${{ needs.changes.outputs.source == 'true' || needs.changes.outputs.component_docs == 'true' || needs.changes.outputs.test-yml == 'true' }} needs: changes @@ -159,7 +159,7 @@ jobs: protoc: true cue: true libsasl2: true - - run: make check-component-docs + - run: make check-generated-docs check-rust-docs: name: Check Rust Docs @@ -217,7 +217,7 @@ jobs: - check-licenses - check-docs - check-markdown - - check-component-docs + - check-generated-docs - check-rust-docs - test-vrl - build-vrl-playground diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 93868085dc574..a9bb9d1f6758a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -141,7 +141,7 @@ echo "Running pre-push checks..." make check-licenses make check-fmt make check-clippy -make check-component-docs +make check-generated-docs # Some other checks that in our experience rarely fail on PRs. make check-deny @@ -292,7 +292,7 @@ cargo vdev check events cargo vdev check licenses # Vector's documentation for each component is generated from the comments attached to the Component structs and members. # Running this ensures that the generated docs are up to date. -make check-component-docs +make check-generated-docs # Generate the code documentation for the Vector project. # Run this to ensure the docs can be generated without errors (warnings are acceptable at the minute). cd rust-doc && make docs diff --git a/Cargo.lock b/Cargo.lock index c7bbd26317a32..0560958b51893 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12475,6 +12475,8 @@ dependencies = [ "tempfile", "toml 0.9.8", "toml_edit 0.22.27", + "vector-vrl-functions", + "vrl", ] [[package]] @@ -13107,7 +13109,7 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "vrl" version = "0.30.0" -source = "git+https://github.com/vectordotdev/vrl.git?branch=main#bacda767c055ff37865ec96fe168b31b344252d1" +source = "git+https://github.com/vectordotdev/vrl.git?branch=main#c4afddc9d18cc368796686c312cd87d90d3c8808" dependencies = [ "aes", "aes-siv", diff --git a/Makefile b/Makefile index 1b63cbe79519a..d747656e1c29b 100644 --- a/Makefile +++ b/Makefile @@ -472,7 +472,7 @@ check: ## Run prerequisite code checks check-all: ## Check everything check-all: check-fmt check-clippy check-docs check-all: check-examples check-component-features -check-all: check-scripts check-deny check-component-docs check-licenses +check-all: check-scripts check-deny check-generated-docs check-licenses .PHONY: check-component-features check-component-features: ## Check that all component features are setup properly @@ -514,9 +514,9 @@ check-deny: ## Check advisories licenses and sources for crate dependencies check-events: ## Check that events satisfy patterns set in https://github.com/vectordotdev/vector/blob/master/rfcs/2020-03-17-2064-event-driven-observability.md ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) check events -.PHONY: check-component-docs -check-component-docs: generate-component-docs ## Checks that the machine-generated component Cue docs are up-to-date. - ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) check component-docs +.PHONY: check-generated-docs +check-generated-docs: generate-docs ## Checks that the machine-generated component Cue docs are up-to-date. + ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) check generated-docs ##@ Rustdoc build-rustdoc: ## Build Vector's Rustdocs @@ -693,6 +693,14 @@ generate-component-docs: ## Generate per-component Cue docs from the configurati ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) build component-docs /tmp/vector-config-schema.json \ $(if $(findstring true,$(CI)),>/dev/null,) +.PHONY: generate-vrl-docs +generate-vrl-docs: ## Generate VRL function documentation from Rust source. + ${MAYBE_ENVIRONMENT_EXEC} $(VDEV) build vrl-docs \ + $(if $(findstring true,$(CI)),>/dev/null,) + +.PHONY: generate-docs +generate-docs: generate-component-docs generate-vrl-docs + .PHONY: signoff signoff: ## Signsoff all previous commits since branch creation scripts/signoff.sh diff --git a/docs/DEVELOPING.md b/docs/DEVELOPING.md index f32965244c28f..628803963dabb 100644 --- a/docs/DEVELOPING.md +++ b/docs/DEVELOPING.md @@ -127,7 +127,7 @@ Loosely, you'll need the following: - **To run integration tests:** Have `docker` available, or a real live version of that service. (Use `AUTOSPAWN=false`) - **To run `make check-component-features`:** Have `remarshal` installed. - **To run `make check-licenses` or `make build-licenses`:** Have `dd-rust-license-tool` [installed](https://github.com/DataDog/rust-license-tool). -- **To run `make generate-component-docs`:** Have `cue` [installed](https://cuelang.org/docs/install/). +- **To run `make generate-docs`:** Have `cue` [installed](https://cuelang.org/docs/install/). If you find yourself needing to run something inside the Docker environment described above, that's totally fine, they won't collide or hurt each other. In this case, you'd just run `make environment-generate`. @@ -161,8 +161,8 @@ cargo bench transforms::example # Format your code before pushing! make fmt cargo fmt -# Build component documentation for the website -make generate-component-docs +# Build component and VRL documentation for the website +make generate-docs ``` If you run `make` you'll see a full list of all our tasks. Some of these will start Docker containers, sign commits, or even make releases. These are not common development commands and your mileage may vary. diff --git a/docs/DOCUMENTING.md b/docs/DOCUMENTING.md index a0529de37a8a9..ccbcfbc0f0a45 100644 --- a/docs/DOCUMENTING.md +++ b/docs/DOCUMENTING.md @@ -61,7 +61,7 @@ Much of Vector's reference documentation is automatically compiled from source c To regenerate this content, run: ```bash -make generate-component-docs +make generate-docs ``` ### Formatting diff --git a/vdev/Cargo.toml b/vdev/Cargo.toml index e629929fbd080..0b06c55bb65cf 100644 --- a/vdev/Cargo.toml +++ b/vdev/Cargo.toml @@ -45,6 +45,9 @@ semver.workspace = true indoc.workspace = true git2 = { version = "0.20.4" } cfg-if.workspace = true +vector-vrl-functions = { path = "../lib/vector-vrl/functions", features = ["dnstap", "vrl-metrics"] } +# Only here for docs generation. Using vrl with this feature enabled will be severely broken +vrl = { workspace = true, features = ["__mock_return_values_for_tests"] } [package.metadata.binstall] pkg-url = "{ repo }/releases/download/vdev-v{ version }/{ name }-{ target }-v{ version }.tgz" diff --git a/vdev/src/commands/build/mod.rs b/vdev/src/commands/build/mod.rs index 48890c8682b83..f46d8a8ae210c 100644 --- a/vdev/src/commands/build/mod.rs +++ b/vdev/src/commands/build/mod.rs @@ -1,6 +1,7 @@ mod licenses; mod publish_metadata; mod vector; +mod vrl_docs; mod vrl_wasm; crate::cli_subcommands! { @@ -11,6 +12,7 @@ crate::cli_subcommands! { publish_metadata, release_cue, vector, + vrl_docs, vrl_wasm, } diff --git a/vdev/src/commands/build/vrl_docs.rs b/vdev/src/commands/build/vrl_docs.rs new file mode 100644 index 0000000000000..1ec39459ae98c --- /dev/null +++ b/vdev/src/commands/build/vrl_docs.rs @@ -0,0 +1,247 @@ +use anyhow::Result; +use serde::Serialize; +use std::{collections::HashMap, fs, path::PathBuf}; +use vrl::compiler::Function; +use vrl::compiler::value::kind; +use vrl::core::Value; +use vrl::prelude::Parameter; +use vrl::prelude::function::EnumVariant; + +/// Generate VRL function documentation as JSON files. +/// +/// This command iterates over all VRL functions available in Vector and generates +/// JSON documentation files that are compatible with the CUE-based documentation +/// pipeline (valid JSON is valid CUE). +#[derive(clap::Args, Debug)] +#[command()] +pub struct Cli { + /// Output directory for generated documentation files + #[arg(long, default_value = "website/cue/reference/remap/functions")] + output_dir: PathBuf, +} + +#[derive(Serialize)] +struct FunctionDocWrapper { + remap: RemapWrapper, +} + +#[derive(Serialize)] +struct RemapWrapper { + functions: std::collections::HashMap, +} + +#[derive(Serialize)] +struct FunctionDoc { + anchor: String, + name: String, + category: String, + description: String, + arguments: Vec, + r#return: ReturnDoc, + #[serde(skip_serializing_if = "Vec::is_empty")] + internal_failure_reasons: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + examples: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + notices: Vec, + pure: bool, +} + +#[derive(Serialize)] +struct ArgumentDoc { + name: String, + description: String, + required: bool, + r#type: Vec, + #[serde(skip_serializing_if = "HashMap::is_empty")] + r#enum: HashMap, + #[serde(skip_serializing_if = "Option::is_none")] + default: Option, +} + +#[derive(Serialize)] +struct ReturnDoc { + types: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + rules: Vec, +} + +#[derive(Serialize)] +struct ExampleDoc { + title: String, + source: String, + #[serde(skip_serializing_if = "Option::is_none")] + r#return: Option, + #[serde(skip_serializing_if = "Option::is_none")] + raises: Option, +} + +impl Cli { + pub fn exec(self) -> Result<()> { + let functions = vector_vrl_functions::all(); + + // Ensure output directory exists + fs::create_dir_all(&self.output_dir)?; + + for func in functions { + let doc = build_function_doc(func.as_ref()); + let filename = format!("{}.cue", doc.name); + let filepath = self.output_dir.join(&filename); + + // Wrap in the expected CUE structure + let mut functions_map = std::collections::HashMap::new(); + functions_map.insert(doc.name.clone(), doc); + let wrapper = FunctionDocWrapper { + remap: RemapWrapper { + functions: functions_map, + }, + }; + + let json = serde_json::to_string_pretty(&wrapper)?; + fs::write(&filepath, json)?; + + println!("Generated: {}", filepath.display()); + } + + println!("\nVRL documentation generation complete."); + Ok(()) + } +} + +fn build_function_doc(func: &dyn Function) -> FunctionDoc { + let name = func.identifier().to_string(); + + let arguments: Vec = func + .parameters() + .iter() + .map(|param| { + let Parameter { + keyword, + kind, + required, + description, + default, + enum_variants, + } = param; + + let name = keyword.trim().to_string(); + let description = description.trim().to_string(); + let default = default.map(pretty_value); + let r#type = kind_to_types(*kind); + let r#enum = enum_variants + .unwrap_or_default() + .iter() + .map(|EnumVariant { value, description }| { + (value.to_string(), description.to_string()) + }) + .collect(); + + ArgumentDoc { + name, + description, + required: *required, + r#type, + default, + r#enum, + } + }) + .collect(); + + let examples: Vec = func + .examples() + .iter() + .map(|example| { + let (r#return, raises) = match &example.result { + Ok(result) => { + // Try to parse as JSON, otherwise treat as string + let value = serde_json::from_str(result) + .unwrap_or_else(|_| serde_json::Value::String(result.to_string())); + (Some(value), None) + } + Err(error) => (None, Some(error.to_string())), + }; + + let source = example.source.to_string(); + let title = example.title.to_string(); + ExampleDoc { + title, + source, + r#return, + raises, + } + }) + .collect(); + + FunctionDoc { + anchor: name.clone(), + name, + category: func.category().to_string(), + description: trim_str(func.usage()), + arguments, + r#return: ReturnDoc { + types: kind_to_types(func.return_kind()), + rules: trim_slice(func.return_rules()), + }, + internal_failure_reasons: trim_slice(func.internal_failure_reasons()), + examples, + notices: trim_slice(func.notices()), + pure: func.pure(), + } +} + +fn kind_to_types(kind_bits: u16) -> Vec { + // All type bits combined + if (kind_bits & kind::ANY) == kind::ANY { + return vec!["any".to_string()]; + } + + let mut types = Vec::new(); + + if (kind_bits & kind::BYTES) == kind::BYTES { + types.push("string".to_string()); + } + if (kind_bits & kind::INTEGER) == kind::INTEGER { + types.push("integer".to_string()); + } + if (kind_bits & kind::FLOAT) == kind::FLOAT { + types.push("float".to_string()); + } + if (kind_bits & kind::BOOLEAN) == kind::BOOLEAN { + types.push("boolean".to_string()); + } + if (kind_bits & kind::OBJECT) == kind::OBJECT { + types.push("object".to_string()); + } + if (kind_bits & kind::ARRAY) == kind::ARRAY { + types.push("array".to_string()); + } + if (kind_bits & kind::TIMESTAMP) == kind::TIMESTAMP { + types.push("timestamp".to_string()); + } + if (kind_bits & kind::REGEX) == kind::REGEX { + types.push("regex".to_string()); + } + if (kind_bits & kind::NULL) == kind::NULL { + types.push("null".to_string()); + } + + assert!(!types.is_empty(), "kind_bits {kind_bits} produced no types"); + + types +} + +fn pretty_value(v: &Value) -> String { + if let Value::Bytes(b) = v { + str::from_utf8(b).map_or_else(|_| v.to_string(), String::from) + } else { + v.to_string() + } +} + +fn trim_str(s: &'static str) -> String { + s.trim().to_string() +} + +fn trim_slice(slice: &'static [&'static str]) -> Vec { + slice.iter().map(|s| s.trim().to_string()).collect() +} diff --git a/vdev/src/commands/check/component_docs.rs b/vdev/src/commands/check/generated_docs.rs similarity index 76% rename from vdev/src/commands/check/component_docs.rs rename to vdev/src/commands/check/generated_docs.rs index 85a8ab280b1d8..62406951e3cde 100644 --- a/vdev/src/commands/check/component_docs.rs +++ b/vdev/src/commands/check/generated_docs.rs @@ -12,7 +12,10 @@ impl Cli { let dirty_component_files: Vec = files .into_iter() .filter(|file| file.starts_with("website/cue/reference")) - .filter(|file| file.contains("generated/")) + .filter(|file| { + file.contains("generated/") + || file.starts_with("website/cue/reference/remap/functions/") + }) .collect(); // If it is not empty, there are out-of-sync component Cue files in the current branch. @@ -22,7 +25,7 @@ impl Cli { println!(" - {file}"); } println!( - "Run `make generate-component-docs` locally to update your branch and commit/push the changes." + "Run `make generate-docs` locally to update your branch and commit/push the changes." ); std::process::exit(1); } diff --git a/vdev/src/commands/check/mod.rs b/vdev/src/commands/check/mod.rs index 4d61a76dbf3c2..44a2242cc35b3 100644 --- a/vdev/src/commands/check/mod.rs +++ b/vdev/src/commands/check/mod.rs @@ -1,8 +1,8 @@ -mod component_docs; mod component_features; mod deny; mod examples; mod fmt; +mod generated_docs; mod licenses; mod markdown; mod rust; @@ -10,7 +10,7 @@ mod scripts; crate::cli_subcommands! { "Check parts of the Vector code base..." - component_docs, + generated_docs, component_features, deny, docs, diff --git a/website/layouts/partials/data.html b/website/layouts/partials/data.html index 7243270d28d40..b486b1233d560 100644 --- a/website/layouts/partials/data.html +++ b/website/layouts/partials/data.html @@ -1800,6 +1800,16 @@

{{ end }} + + {{ if isset .ctx "raises" }} +
+ Raises + +
+ {{ template "code" .ctx.raises }} +
+
+ {{ end }} {{ end }}