diff --git a/Cargo.lock b/Cargo.lock index b9570b30a5..eb2ac27ccf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -393,6 +393,17 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.112", +] + [[package]] name = "dunce" version = "1.0.5" @@ -755,12 +766,114 @@ dependencies = [ "tower-service", ] +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "ignore" version = "0.4.25" @@ -868,6 +981,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + [[package]] name = "lock_api" version = "0.4.14" @@ -1006,6 +1125,7 @@ dependencies = [ "mdbook-preprocessor", "mdbook-renderer", "mdbook-summary", + "pulldown-cmark-to-cmark", "regex", "serde", "serde_json", @@ -1014,6 +1134,7 @@ dependencies = [ "toml", "topological-sort", "tracing", + "url", ] [[package]] @@ -1416,6 +1537,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1822,6 +1952,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "string_cache" version = "0.8.9" @@ -1906,6 +2042,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.112", +] + [[package]] name = "tempfile" version = "3.24.0" @@ -1969,6 +2116,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tokio" version = "1.48.0" @@ -2228,6 +2385,18 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + [[package]] name = "utf-8" version = "0.7.6" @@ -2240,6 +2409,12 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091" +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2405,6 +2580,12 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + [[package]] name = "xml5ever" version = "0.17.0" @@ -2420,6 +2601,29 @@ dependencies = [ name = "xtask" version = "0.0.0" +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.112", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.8.31" @@ -2440,6 +2644,60 @@ dependencies = [ "syn 2.0.112", ] +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.112", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.112", +] + [[package]] name = "zmij" version = "1.0.6" diff --git a/Cargo.toml b/Cargo.toml index 9643917443..a4673061a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ notify-debouncer-mini = "0.7.0" opener = "0.8.3" pathdiff = "0.2.3" pulldown-cmark = { version = "0.13.0", default-features = false, features = ["html"] } # Do not update, part of the public api. +pulldown-cmark-to-cmark = "22.0.0" regex = "1.12.2" select = "0.6.1" semver = "1.0.27" @@ -67,6 +68,7 @@ topological-sort = "0.2.2" tower-http = "0.6.8" tracing = "0.1.44" tracing-subscriber = { version = "0.3.22", features = ["env-filter"] } +url = "2.5.8" walkdir = "2.5.0" [package] diff --git a/crates/mdbook-driver/Cargo.toml b/crates/mdbook-driver/Cargo.toml index 92bc19f96c..aff44c4548 100644 --- a/crates/mdbook-driver/Cargo.toml +++ b/crates/mdbook-driver/Cargo.toml @@ -16,6 +16,7 @@ mdbook-markdown.workspace = true mdbook-preprocessor.workspace = true mdbook-renderer.workspace = true mdbook-summary.workspace = true +pulldown-cmark-to-cmark.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true @@ -24,6 +25,7 @@ tempfile.workspace = true toml.workspace = true topological-sort.workspace = true tracing.workspace = true +url.workspace = true [lints] workspace = true diff --git a/crates/mdbook-driver/src/builtin_preprocessors/index.rs b/crates/mdbook-driver/src/builtin_preprocessors/index.rs index 7d9d22ce97..75041931a9 100644 --- a/crates/mdbook-driver/src/builtin_preprocessors/index.rs +++ b/crates/mdbook-driver/src/builtin_preprocessors/index.rs @@ -1,9 +1,12 @@ use anyhow::Result; use mdbook_core::book::{Book, BookItem}; use mdbook_core::static_regex; +use mdbook_markdown::pulldown_cmark::{Event, LinkType, Tag}; +use mdbook_markdown::{MarkdownOptions, new_cmark_parser}; use mdbook_preprocessor::{Preprocessor, PreprocessorContext}; -use std::path::Path; +use std::path::{Path, PathBuf}; use tracing::warn; +use url::{ParseError, Url}; /// A preprocessor for converting file name `README.md` to `index.md` since /// `README.md` is the de facto index file in markdown-based documentation. @@ -30,6 +33,7 @@ impl Preprocessor for IndexPreprocessor { let source_dir = ctx.root.join(&ctx.config.book.src); book.for_each_mut(|section: &mut BookItem| { if let BookItem::Chapter(ref mut ch) = *section { + // Rename README files to `index.md` if let Some(ref mut path) = ch.path { if is_readme_file(&path) { let mut index_md = source_dir.join(path.with_file_name("index.md")); @@ -40,6 +44,9 @@ impl Preprocessor for IndexPreprocessor { path.set_file_name("index.md"); } } + // Fix inline links to README files + let markdown_options = get_markdown_options(ctx); + ch.content = fix_readme_links(&ch.content, &markdown_options).unwrap_or_default(); } }); @@ -78,6 +85,52 @@ fn is_readme_file>(path: P) -> bool { ) } +fn get_markdown_options(ctx: &PreprocessorContext) -> MarkdownOptions { + let html_config = ctx.config.html_config().unwrap_or_default(); + let mut markdown_options = MarkdownOptions::default(); + markdown_options.smart_punctuation = html_config.smart_punctuation; + markdown_options.definition_lists = html_config.definition_lists; + markdown_options.admonitions = html_config.admonitions; + markdown_options +} + +fn fix_readme_links(content: &str, markdown_options: &MarkdownOptions) -> Result { + let mut buf = String::with_capacity(content.len()); + + let events = new_cmark_parser(content, markdown_options).map(|e| match e { + Event::Start(Tag::Link { + link_type: + link_type @ (LinkType::Inline + | LinkType::Reference + | LinkType::Collapsed + | LinkType::Shortcut), + dest_url, + title, + id, + }) if matches!( + Url::parse(&dest_url), + Err(ParseError::RelativeUrlWithoutBase) + ) => + { + Event::Start(Tag::Link { + link_type: link_type, + dest_url: { + let mut path = PathBuf::from(dest_url.as_ref()); + if is_readme_file(&path) { + path.set_file_name("index.md"); + } + path.to_string_lossy().into_owned().into() + }, + title, + id, + }) + } + e => e, + }); + + Ok(pulldown_cmark_to_cmark::cmark(events, &mut buf).map(|_| buf)?) +} + #[cfg(test)] mod tests { use super::*; @@ -102,4 +155,105 @@ mod tests { let path = "path/to/README-README.md"; assert!(!is_readme_file(path)); } + + #[test] + #[cfg(target_os = "windows")] + fn internal_readme_links_are_converted_to_index() { + let opts = MarkdownOptions::default(); + + let md = "[inline](path/to/README.md)"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "[inline](path/to\\index.md)" + ); + + let md = "[reference][link]\n\n[link]: path/to/README.md"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "[reference][link]\n\n[link]: path/to\\index.md" + ); + + let md = "[collapsed][]\n\n[collapsed]: path/to/README.md"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "[collapsed][]\n\n[collapsed]: path/to\\index.md" + ); + + let md = "[shortcut]\n\n[shortcut]: path/to/README.md"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "[shortcut]\n\n[shortcut]: path/to\\index.md" + ); + } + + #[test] + #[cfg(not(target_os = "windows"))] + fn internal_readme_links_are_converted_to_index() { + let opts = MarkdownOptions::default(); + + let md = "[inline](path/to/README.md)"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "[inline](path/to/index.md)" + ); + + let md = "[reference][link]\n\n[link]: path/to/README.md"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "[reference][link]\n\n[link]: path/to/index.md" + ); + + let md = "[collapsed][]\n\n[collapsed]: path/to/README.md"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "[collapsed][]\n\n[collapsed]: path/to/index.md" + ); + + let md = "[shortcut]\n\n[shortcut]: path/to/README.md"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "[shortcut]\n\n[shortcut]: path/to/index.md" + ); + } + + #[test] + fn other_links_are_not_converted_to_index() { + let opts = MarkdownOptions::default(); + + let md = "[inline](https://example.com)"; + assert_eq!(fix_readme_links(md, &opts).unwrap(), md); + + let md = "[inline]()"; + assert_eq!(fix_readme_links(md, &opts).unwrap(), md); + + let md = "[reference][link]\n\n[link]: https://example.com"; + assert_eq!(fix_readme_links(md, &opts).unwrap(), md); + + let md = "[reference][unknown]"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "\\[reference\\]\\[unknown\\]" + ); + + let md = "[collapsed][]\n\n[collapsed]: https://example.com"; + assert_eq!(fix_readme_links(md, &opts).unwrap(), md); + + let md = "[collapsed][]"; + assert_eq!( + fix_readme_links(md, &opts).unwrap(), + "\\[collapsed\\]\\[\\]" + ); + + let md = "[shortcut]\n\n[shortcut]: https://example.com"; + assert_eq!(fix_readme_links(md, &opts).unwrap(), md); + + let md = "[shortcut]"; + assert_eq!(fix_readme_links(md, &opts).unwrap(), "\\[shortcut\\]"); + + let md = ""; + assert_eq!(fix_readme_links(md, &opts).unwrap(), md); + + let md = ""; + assert_eq!(fix_readme_links(md, &opts).unwrap(), md); + } } diff --git a/examples/remove-emphasis/mdbook-remove-emphasis/Cargo.toml b/examples/remove-emphasis/mdbook-remove-emphasis/Cargo.toml index d98f7f45c4..79956c16fd 100644 --- a/examples/remove-emphasis/mdbook-remove-emphasis/Cargo.toml +++ b/examples/remove-emphasis/mdbook-remove-emphasis/Cargo.toml @@ -7,7 +7,7 @@ publish = false [dependencies] mdbook-preprocessor.workspace = true pulldown-cmark = { workspace = true, default-features = false } -pulldown-cmark-to-cmark = "22.0.0" +pulldown-cmark-to-cmark.workspace = true serde_json.workspace = true [[bin]]