From ad4e59d301489137cfa3302174dc043deee8b2ee Mon Sep 17 00:00:00 2001 From: Adrian Braemer Date: Thu, 9 Apr 2026 16:38:57 +0200 Subject: [PATCH] feat(models): add LineNumber newtype to replace raw usize for start_line/end_line Introduce LineNumber (backed by NonZeroUsize) and LineSpan newtypes to replace raw usize fields for start_line/end_line across all output, detection, and internal types. This eliminates the possibility of invalid zero-valued line numbers at the type level, removes runtime guards that checked for start_line > 0, and makes the 1-indexed semantics explicit in the type system. Key changes: - New src/models/line_number.rs with LineNumber(NonZeroUsize) and LineSpan { start, end } types - All output structs (Match, Copyright, Holder, Author, OutputEmail, OutputURL) now use LineNumber for start_line/end_line - All internal detection types (CopyrightDetection, HolderDetection, AuthorDetection, Token, EmailDetection, UrlDetection, LicenseMatch, FileRegion, DeclaredLicenseMatchMetadata) migrated - SerializableLicenseMatch preserves JSON backward compatibility via serde(transparent) on LineNumber - Removed now-unnecessary start_line > 0 / end_line > 0 guards - line_for_pos() returns Option instead of Option - matched_text_from_text() takes (LineNumber, LineNumber) params - ranges_overlap() takes LineNumber params (Ord-based comparison) - LineSpan defined but not yet adopted (for future use) --- src/copyright/credits.rs | 9 +- src/copyright/detector.rs | 1303 +++++++++-------- src/copyright/detector_author_heuristics.rs | 124 +- src/copyright/detector_test.rs | 128 +- src/copyright/lexer.rs | 9 +- src/copyright/lexer_test.rs | 7 +- src/copyright/line_tracking.rs | 10 +- src/copyright/parser.rs | 5 +- src/copyright/parser_test.rs | 3 +- src/copyright/types.rs | 42 +- src/finder/emails.rs | 8 +- src/finder/golden_test.rs | 8 +- src/finder/mod.rs | 3 +- src/finder/urls.rs | 8 +- src/license_detection/aho_match.rs | 12 +- src/license_detection/detection/analysis.rs | 36 +- src/license_detection/detection/grouping.rs | 15 +- src/license_detection/detection/identifier.rs | 5 +- src/license_detection/detection/mod.rs | 80 +- src/license_detection/detection/types.rs | 12 +- src/license_detection/hash_match.rs | 11 +- .../match_refine/false_positive.rs | 5 +- .../match_refine/filter_low_quality.rs | 30 +- .../match_refine/handle_overlaps.rs | 47 +- src/license_detection/match_refine/merge.rs | 41 +- src/license_detection/match_refine/mod.rs | 13 +- src/license_detection/models/license_match.rs | 13 +- src/license_detection/models/mod_tests.rs | 63 +- src/license_detection/seq_match/matching.rs | 11 +- src/license_detection/seq_match/mod.rs | 20 +- src/license_detection/spdx_lid/mod.rs | 7 +- src/license_detection/spdx_lid/test.rs | 9 +- src/license_detection/tests.rs | 3 +- src/license_detection/unknown_match.rs | 35 +- src/main_test.rs | 25 +- src/models/file_info.rs | 33 +- src/models/line_number.rs | 72 + src/models/mod.rs | 2 + src/output/debian.rs | 10 +- src/output/html.rs | 24 +- src/output/mod.rs | 40 +- src/output/spdx.rs | 6 +- src/parsers/debian.rs | 28 +- src/parsers/license_normalization.rs | 25 +- src/post_processing/classify_test.rs | 16 +- src/post_processing/mod.rs | 5 +- src/post_processing/output_test.rs | 232 +-- src/post_processing/reference_following.rs | 16 +- src/post_processing/summary/test.rs | 195 +-- src/post_processing/tallies_test.rs | 124 +- src/scan_result_shaping/core_test.rs | 148 +- src/scan_result_shaping/json_input_test.rs | 9 +- src/scan_result_shaping/mod.rs | 14 +- src/scanner/mod.rs | 10 +- src/scanner/process.rs | 39 +- tests/output_format_golden.rs | 20 +- tests/scanner_copyright_credits.rs | 4 +- tests/scanner_integration.rs | 18 +- 58 files changed, 1783 insertions(+), 1467 deletions(-) create mode 100644 src/models/line_number.rs diff --git a/src/copyright/credits.rs b/src/copyright/credits.rs index fab9b7242..8ab486a87 100644 --- a/src/copyright/credits.rs +++ b/src/copyright/credits.rs @@ -11,6 +11,7 @@ use std::path::Path; use super::types::AuthorDetection; +use crate::models::LineNumber; /// Filenames recognized as CREDITS/AUTHORS files (case-insensitive) const CREDITS_FILENAMES: &[&str] = &[ @@ -118,8 +119,8 @@ fn process_credit_group(group: &[(usize, &str)]) -> Option { Some(AuthorDetection { author, - start_line, - end_line, + start_line: LineNumber::new(start_line).expect("invalid line number"), + end_line: LineNumber::new(end_line).expect("invalid line number"), }) } @@ -154,8 +155,8 @@ W: http://www.randombit.net/ authors[0].author, "Jack Lloyd lloyd@randombit.net http://www.randombit.net/" ); - assert_eq!(authors[0].start_line, 1); - assert_eq!(authors[0].end_line, 3); + assert_eq!(authors[0].start_line, LineNumber::ONE); + assert_eq!(authors[0].end_line, LineNumber::new(3).unwrap()); } #[test] diff --git a/src/copyright/detector.rs b/src/copyright/detector.rs index dc3c40e8c..0b7c4e863 100644 --- a/src/copyright/detector.rs +++ b/src/copyright/detector.rs @@ -32,6 +32,7 @@ use super::refiner::{ use super::types::{ AuthorDetection, CopyrightDetection, HolderDetection, ParseNode, PosTag, Token, TreeLabel, }; +use crate::models::LineNumber; const NON_COPYRIGHT_LABELS: &[TreeLabel] = &[]; const NON_HOLDER_LABELS: &[TreeLabel] = &[TreeLabel::YrRange, TreeLabel::YrAnd]; @@ -166,7 +167,7 @@ pub fn detect_copyrights_from_text_with_deadline( if let Some(det) = extract_original_author_additional_contributors(&tree) && !authors .iter() - .any(|a| a.author == det.author && a.start_line == det.start_line) + .any(|a| a.author == det.author && a.start_line.get() == det.start_line.get()) { authors.push(det); } @@ -188,12 +189,12 @@ pub fn detect_copyrights_from_text_with_deadline( let mut ok = true; for t in tree.iter().flat_map(collect_all_leaves) { if let Some(existing) = line { - if existing != t.start_line { + if existing != t.start_line.get() { ok = false; break; } } else { - line = Some(t.start_line); + line = Some(t.start_line.get()); } } ok @@ -239,7 +240,7 @@ pub fn detect_copyrights_from_text_with_deadline( if let Some(det) = extract_original_author_additional_contributors(&tree) && !authors .iter() - .any(|a| a.author == det.author && a.start_line == det.start_line) + .any(|a| a.author == det.author && a.start_line.get() == det.start_line.get()) { authors.push(det); } @@ -324,10 +325,6 @@ pub fn detect_copyrights_from_text_with_deadline( dedupe_exact_span_holders(&mut holders); dedupe_exact_span_authors(&mut authors); - copyrights.retain(|c| c.start_line > 0 && c.end_line > 0); - holders.retain(|h| h.start_line > 0 && h.end_line > 0); - authors.retain(|a| a.start_line > 0 && a.end_line > 0); - (copyrights, holders, authors) } @@ -387,10 +384,11 @@ fn add_missing_holders_for_bare_c_name_year_suffixes( if holder.is_empty() { continue; } - if holders - .iter() - .any(|h| h.start_line == c.start_line && h.end_line == c.end_line && h.holder == holder) - { + if holders.iter().any(|h| { + h.start_line.get() == c.start_line.get() + && h.end_line.get() == c.end_line.get() + && h.holder == holder + }) { continue; } holders.push(HolderDetection { @@ -525,7 +523,7 @@ fn drop_shadowed_year_only_copyright_prefixes_same_start_line( std::collections::HashMap::new(); for c in copyrights.iter() { by_start - .entry(c.start_line) + .entry(c.start_line.get()) .or_default() .push(normalize_whitespace(&c.copyright)); } @@ -535,7 +533,7 @@ fn drop_shadowed_year_only_copyright_prefixes_same_start_line( if !YEAR_ONLY_RE.is_match(short.as_str()) { return true; } - let Some(all) = by_start.get(&c.start_line) else { + let Some(all) = by_start.get(&c.start_line.get()) else { return true; }; !all.iter() @@ -559,7 +557,7 @@ fn drop_year_only_copyrights_shadowed_by_previous_software_copyright_line( }); copyrights.retain(|c| { - if c.start_line <= 1 { + if c.start_line <= LineNumber::ONE { return true; } let Some(cap) = YEAR_ONLY_RE.captures(c.copyright.trim()) else { @@ -570,12 +568,12 @@ fn drop_year_only_copyrights_shadowed_by_previous_software_copyright_line( return true; } - let this_raw = raw_lines.get(c.start_line - 1).copied().unwrap_or(""); + let this_raw = raw_lines.get(c.start_line.get() - 1).copied().unwrap_or(""); if this_raw.to_ascii_lowercase().contains("software copyright") { return false; } - let prev_prepared = prepared_cache.get(c.start_line - 1).unwrap_or(""); + let prev_prepared = prepared_cache.get(c.start_line.get() - 1).unwrap_or(""); if let Some(prev) = PREV_SOFTWARE_RE.captures(prev_prepared) { let y2 = prev.name("year").map(|m| m.as_str()).unwrap_or(""); return y2 != year; @@ -658,12 +656,12 @@ fn merge_multiline_person_year_copyright_continuations( if !copyrights .iter() - .any(|c| c.start_line == ln1 && c.end_line == ln2 && c.copyright == refined) + .any(|c| c.start_line.get() == ln1 && c.end_line.get() == ln2 && c.copyright == refined) { copyrights.push(CopyrightDetection { copyright: refined.clone(), - start_line: ln1, - end_line: ln2, + start_line: LineNumber::new(ln1).expect("valid"), + end_line: LineNumber::new(ln2).expect("valid"), }); } @@ -671,12 +669,12 @@ fn merge_multiline_person_year_copyright_continuations( if let Some(h) = refine_holder_in_copyright_context(&raw_holder) && !holders .iter() - .any(|x| x.start_line == ln1 && x.end_line == ln2 && x.holder == h) + .any(|x| x.start_line.get() == ln1 && x.end_line.get() == ln2 && x.holder == h) { holders.push(HolderDetection { holder: h, - start_line: ln1, - end_line: ln2, + start_line: LineNumber::new(ln1).expect("valid"), + end_line: LineNumber::new(ln2).expect("valid"), }); } } @@ -692,7 +690,7 @@ fn add_embedded_copyright_clause_variants(copyrights: &mut Vec = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut to_add = Vec::new(); @@ -717,7 +715,7 @@ fn add_embedded_copyright_clause_variants(copyrights: &mut Vec = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let existing_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); let mut new_c = Vec::new(); @@ -760,7 +758,7 @@ fn add_found_at_short_variants( continue; } let short = format!("(c) by {name}"); - let key = (c.start_line, c.end_line, short.clone()); + let key = (c.start_line.get(), c.end_line.get(), short.clone()); if !existing_c.contains(&key) { new_c.push(CopyrightDetection { copyright: short, @@ -770,7 +768,7 @@ fn add_found_at_short_variants( } let holder_short = name.to_string(); - let hkey = (c.start_line, c.end_line, holder_short.clone()); + let hkey = (c.start_line.get(), c.end_line.get(), holder_short.clone()); if !existing_h.contains(&hkey) { new_h.push(HolderDetection { holder: holder_short, @@ -829,7 +827,7 @@ fn drop_shadowed_linux_foundation_holder_copyrights_same_line( if let Some(cap) = WITH_C_RE.captures(c.copyright.trim()) { let years = cap.name("years").map(|m| m.as_str()).unwrap_or("").trim(); if !years.is_empty() { - years_by_line.insert((c.start_line, years.to_string())); + years_by_line.insert((c.start_line.get(), years.to_string())); } } } @@ -842,7 +840,7 @@ fn drop_shadowed_linux_foundation_holder_copyrights_same_line( if years.is_empty() { return true; } - !years_by_line.contains(&(c.start_line, years.to_string())) + !years_by_line.contains(&(c.start_line.get(), years.to_string())) }); } @@ -875,19 +873,21 @@ fn restore_linux_foundation_copyrights_from_raw_lines( let full = normalize_whitespace(&format!("Copyright (c) {years} Linux Foundation")); if copyrights .iter() - .any(|c| c.start_line == ln && c.end_line == ln && c.copyright == full) + .any(|c| c.start_line.get() == ln && c.end_line.get() == ln && c.copyright == full) { continue; } to_add.push(CopyrightDetection { copyright: full.clone(), - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); let bare = normalize_whitespace(&format!("Copyright (c) {years}")); - copyrights.retain(|c| !(c.start_line == ln && c.end_line == ln && c.copyright == bare)); + copyrights.retain(|c| { + !(c.start_line.get() == ln && c.end_line.get() == ln && c.copyright == bare) + }); } copyrights.extend(to_add); @@ -906,15 +906,15 @@ fn add_bare_email_variants_for_escaped_angle_lines( let existing: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut to_add = Vec::new(); for c in copyrights.iter() { - if c.start_line == 0 || c.end_line == 0 || c.start_line != c.end_line { + if c.start_line.get() != c.end_line.get() { continue; } - let Some(raw) = raw_lines.get(c.start_line - 1) else { + let Some(raw) = raw_lines.get(c.start_line.get() - 1) else { continue; }; let raw_lower = raw.to_ascii_lowercase(); @@ -930,7 +930,7 @@ fn add_bare_email_variants_for_escaped_angle_lines( let Some(refined) = refine_copyright(&bare) else { continue; }; - let key = (c.start_line, c.end_line, refined.clone()); + let key = (c.start_line.get(), c.end_line.get(), refined.clone()); if !existing.contains(&key) { to_add.push(CopyrightDetection { copyright: refined, @@ -952,13 +952,13 @@ fn drop_comma_holders_shadowed_by_space_version_same_span(holders: &mut Vec> = HashMap::new(); for h in holders.iter() { by_span - .entry((h.start_line, h.end_line)) + .entry((h.start_line.get(), h.end_line.get())) .or_default() .insert(h.holder.clone()); } holders.retain(|h| { - let Some(set) = by_span.get(&(h.start_line, h.end_line)) else { + let Some(set) = by_span.get(&(h.start_line.get(), h.end_line.get())) else { return true; }; if !h.holder.contains(',') { @@ -1019,8 +1019,8 @@ fn normalize_company_suffix_period_holder_variants(holders: &mut Vec = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut existing_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); for c in copyrights.clone() { @@ -1106,7 +1106,7 @@ fn add_confidential_short_variants_late( let Some(short_c) = refine_copyright(&short_c_raw) else { continue; }; - let key = (c.start_line, c.end_line, short_c.clone()); + let key = (c.start_line.get(), c.end_line.get(), short_c.clone()); if existing_c.insert(key) { copyrights.push(CopyrightDetection { copyright: short_c, @@ -1116,7 +1116,7 @@ fn add_confidential_short_variants_late( } let short_h = "Confidential".to_string(); - let hkey = (c.start_line, c.end_line, short_h.clone()); + let hkey = (c.start_line.get(), c.end_line.get(), short_h.clone()); if existing_h.insert(hkey) { holders.push(HolderDetection { holder: short_h, @@ -1149,7 +1149,7 @@ fn split_multiline_holder_lists_from_copyright_email_sequences( let mut to_remove: HashSet<(usize, usize, String)> = HashSet::new(); for c in copyrights { - if c.end_line <= c.start_line { + if c.end_line.get() <= c.start_line.get() { continue; } @@ -1188,12 +1188,12 @@ fn split_multiline_holder_lists_from_copyright_email_sequences( let mut has_joined_holder = false; for h in holders.iter() { - if h.start_line == c.start_line - && h.end_line == c.end_line + if h.start_line.get() == c.start_line.get() + && h.end_line.get() == c.end_line.get() && normalize_whitespace(&h.holder) == joined { has_joined_holder = true; - to_remove.insert((h.start_line, h.end_line, h.holder.clone())); + to_remove.insert((h.start_line.get(), h.end_line.get(), h.holder.clone())); } } @@ -1202,10 +1202,10 @@ fn split_multiline_holder_lists_from_copyright_email_sequences( } for name in split_names { - let key = (c.start_line, c.end_line, name.clone()); + let key = (c.start_line.get(), c.end_line.get(), name.clone()); if !holders .iter() - .any(|h| (h.start_line, h.end_line, h.holder.clone()) == key) + .any(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone()) == key) { to_add.push(HolderDetection { holder: name, @@ -1217,7 +1217,9 @@ fn split_multiline_holder_lists_from_copyright_email_sequences( } if !to_remove.is_empty() { - holders.retain(|h| !to_remove.contains(&(h.start_line, h.end_line, h.holder.clone()))); + holders.retain(|h| { + !to_remove.contains(&(h.start_line.get(), h.end_line.get(), h.holder.clone())) + }); } if !to_add.is_empty() { holders.extend(to_add); @@ -1241,11 +1243,11 @@ fn add_karlsruhe_university_short_variants( let mut existing_c: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut existing_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); for c in copyrights.clone() { @@ -1262,7 +1264,7 @@ fn add_karlsruhe_university_short_variants( if short == c.copyright { continue; } - let key = (c.start_line, c.end_line, short.clone()); + let key = (c.start_line.get(), c.end_line.get(), short.clone()); if existing_c.insert(key) { copyrights.push(CopyrightDetection { copyright: short, @@ -1286,7 +1288,7 @@ fn add_karlsruhe_university_short_variants( if short == h.holder { continue; } - let key = (h.start_line, h.end_line, short.clone()); + let key = (h.start_line.get(), h.end_line.get(), short.clone()); if existing_h.insert(key) { holders.push(HolderDetection { holder: short, @@ -1321,7 +1323,7 @@ fn add_intel_and_sun_non_portions_variants( let mut existing: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); for c in copyrights.clone() { @@ -1333,7 +1335,7 @@ fn add_intel_and_sun_non_portions_variants( let candidate = normalize_whitespace(&format!("Copyright {year} Sun Microsystems{tail}")); if let Some(refined) = refine_copyright(&candidate) { - let key = (c.start_line, c.end_line, refined.clone()); + let key = (c.start_line.get(), c.end_line.get(), refined.clone()); if existing.insert(key) { copyrights.push(CopyrightDetection { copyright: refined, @@ -1346,10 +1348,10 @@ fn add_intel_and_sun_non_portions_variants( } if PORTIONS_INTEL_RE.is_match(trimmed) - && (c.end_line > c.start_line || trimmed.contains('(')) + && (c.end_line.get() > c.start_line.get() || trimmed.contains('(')) { let mut joined = String::new(); - for ln in c.start_line..=c.end_line { + for ln in c.start_line.get()..=c.end_line.get() { if let Some(p) = prepared_cache.get(ln) { if !joined.is_empty() { joined.push(' '); @@ -1364,7 +1366,7 @@ fn add_intel_and_sun_non_portions_variants( let candidate = normalize_whitespace(&format!("Copyright 2002 Intel ({emails})")); if let Some(refined) = refine_copyright(&candidate) { - let key = (c.start_line, c.end_line, refined.clone()); + let key = (c.start_line.get(), c.end_line.get(), refined.clone()); if existing.insert(key) { copyrights.push(CopyrightDetection { copyright: refined, @@ -1390,7 +1392,7 @@ fn add_first_angle_email_only_variants(copyrights: &mut Vec) let mut existing: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); for c in copyrights.clone() { @@ -1405,7 +1407,7 @@ fn add_first_angle_email_only_variants(copyrights: &mut Vec) let Some(refined) = refine_copyright(prefix) else { continue; }; - let key = (c.start_line, c.end_line, refined.clone()); + let key = (c.start_line.get(), c.end_line.get(), refined.clone()); if existing.insert(key) { copyrights.push(CopyrightDetection { copyright: refined, @@ -1431,13 +1433,13 @@ fn drop_shadowed_angle_email_prefix_copyrights_same_span(copyrights: &mut Vec> = HashMap::new(); for c in copyrights.iter() { by_span - .entry((c.start_line, c.end_line)) + .entry((c.start_line.get(), c.end_line.get())) .or_default() .push(c.copyright.clone()); } copyrights.retain(|c| { - let span = (c.start_line, c.end_line); + let span = (c.start_line.get(), c.end_line.get()); let Some(all) = by_span.get(&span) else { return true; }; @@ -1485,7 +1487,7 @@ fn drop_shadowed_quote_before_email_variants_same_span(copyrights: &mut Vec> = HashMap::new(); for c in copyrights.iter() { exact_by_span - .entry((c.start_line, c.end_line)) + .entry((c.start_line.get(), c.end_line.get())) .or_default() .insert(c.copyright.clone()); } @@ -1494,7 +1496,7 @@ fn drop_shadowed_quote_before_email_variants_same_span(copyrights: &mut Vec) { let existing: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut to_add = Vec::new(); @@ -1563,7 +1565,7 @@ fn add_but_suffix_short_variants(copyrights: &mut Vec) { let Some(refined) = refine_copyright(prefix) else { continue; }; - let key = (c.start_line, c.end_line, refined.clone()); + let key = (c.start_line.get(), c.end_line.get(), refined.clone()); if !existing.contains(&key) { to_add.push(CopyrightDetection { copyright: refined, @@ -1586,11 +1588,11 @@ fn add_at_affiliation_short_variants( let existing_c: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let existing_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); let mut to_add_c = Vec::new(); @@ -1605,7 +1607,7 @@ fn add_at_affiliation_short_variants( let Some(refined) = refine_copyright(head) else { continue; }; - let key = (c.start_line, c.end_line, refined.clone()); + let key = (c.start_line.get(), c.end_line.get(), refined.clone()); if !existing_c.contains(&key) { to_add_c.push(CopyrightDetection { copyright: refined, @@ -1631,7 +1633,7 @@ fn add_at_affiliation_short_variants( let Some(refined) = refine_holder_in_copyright_context(head) else { continue; }; - let key = (h.start_line, h.end_line, refined.clone()); + let key = (h.start_line.get(), h.end_line.get(), refined.clone()); if !existing_h.contains(&key) { to_add_h.push(HolderDetection { holder: refined, @@ -1654,22 +1656,22 @@ fn add_missing_copyrights_for_holder_lines_with_emails( let mut copyright_lines: HashSet = HashSet::new(); for c in copyrights.iter() { - if c.start_line == c.end_line { - copyright_lines.insert(c.start_line); + if c.start_line.get() == c.end_line.get() { + copyright_lines.insert(c.start_line.get()); } } let existing: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut to_add = Vec::new(); for h in holders.iter() { - if h.start_line != h.end_line { + if h.start_line.get() != h.end_line.get() { continue; } - let ln = h.start_line; + let ln = h.start_line.get(); if ln == 0 || ln > prepared_cache.len() { continue; } @@ -1702,8 +1704,8 @@ fn add_missing_copyrights_for_holder_lines_with_emails( } to_add.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } copyrights.extend(to_add); @@ -1727,7 +1729,7 @@ fn extend_inline_obfuscated_angle_email_suffixes( }); for c in copyrights.iter_mut() { - if c.start_line == 0 || c.end_line == 0 || c.start_line != c.end_line { + if c.start_line.get() != c.end_line.get() { continue; } if c.copyright.to_ascii_lowercase().contains(" at ") @@ -1736,7 +1738,7 @@ fn extend_inline_obfuscated_angle_email_suffixes( continue; } - let ln = c.start_line; + let ln = c.start_line.get(); let Some(refined_line) = refined_line_cache .entry(ln) .or_insert_with(|| { @@ -1831,7 +1833,7 @@ fn strip_lone_obfuscated_angle_email_user_tokens( for c in copyrights .iter_mut() - .filter(|c| c.start_line == ln && c.end_line == ln) + .filter(|c| c.start_line.get() == ln && c.end_line.get() == ln) { let lower = c.copyright.to_ascii_lowercase(); if lower.contains(" at ") || lower.contains(" dot ") { @@ -1849,7 +1851,7 @@ fn strip_lone_obfuscated_angle_email_user_tokens( for h in holders .iter_mut() - .filter(|h| h.start_line == ln && h.end_line == ln) + .filter(|h| h.start_line.get() == ln && h.end_line.get() == ln) { let lower = h.holder.to_ascii_lowercase(); if lower.contains(" at ") || lower.contains(" dot ") { @@ -1884,7 +1886,7 @@ fn add_at_domain_variants_for_short_net_angle_emails( let existing: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut to_add = Vec::new(); @@ -1903,7 +1905,7 @@ fn add_at_domain_variants_for_short_net_angle_emails( let Some(refined) = refine_copyright(&replaced) else { continue; }; - let key = (c.start_line, c.end_line, refined.clone()); + let key = (c.start_line.get(), c.end_line.get(), refined.clone()); if existing.contains(&key) { continue; } @@ -1929,7 +1931,7 @@ fn drop_shadowed_plain_email_prefix_copyrights_same_span(copyrights: &mut Vec> = HashMap::new(); for c in copyrights.iter() { by_span - .entry((c.start_line, c.end_line)) + .entry((c.start_line.get(), c.end_line.get())) .or_default() .push(c.copyright.clone()); } @@ -1960,7 +1962,9 @@ fn drop_shadowed_plain_email_prefix_copyrights_same_span(copyrights: &mut Vec = HashSet::new(); for c in copyrights.iter() { - if c.end_line <= c.start_line { + if c.end_line.get() <= c.start_line.get() { continue; } let Some(cap) = YEARS_EMAIL_RE.captures(c.copyright.trim()) else { @@ -1994,7 +1998,7 @@ fn drop_single_line_copyrights_shadowed_by_multiline_same_start( if years_norm.is_empty() || email.is_empty() { continue; } - multi_keys.insert((c.start_line, years_norm, email.to_ascii_lowercase())); + multi_keys.insert((c.start_line.get(), years_norm, email.to_ascii_lowercase())); } if multi_keys.is_empty() { @@ -2002,10 +2006,7 @@ fn drop_single_line_copyrights_shadowed_by_multiline_same_start( } copyrights.retain(|c| { - if c.start_line == 0 { - return true; - } - if c.end_line != c.start_line { + if c.end_line.get() != c.start_line.get() { return true; } let Some(cap) = YEARS_EMAIL_RE.captures(c.copyright.trim()) else { @@ -2020,7 +2021,7 @@ fn drop_single_line_copyrights_shadowed_by_multiline_same_start( if years_norm.is_empty() || email.is_empty() { return true; } - !multi_keys.contains(&(c.start_line, years_norm, email.to_ascii_lowercase())) + !multi_keys.contains(&(c.start_line.get(), years_norm, email.to_ascii_lowercase())) }); } @@ -2038,11 +2039,11 @@ fn normalize_french_support_disclaimer_copyrights( let existing_c: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let existing_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); let mut to_add_c = Vec::new(); @@ -2060,7 +2061,7 @@ fn normalize_french_support_disclaimer_copyrights( let Some(short) = refine_copyright(short_raw) else { continue; }; - let ckey = (c.start_line, c.end_line, short.clone()); + let ckey = (c.start_line.get(), c.end_line.get(), short.clone()); if !existing_c.contains(&ckey) { to_add_c.push(CopyrightDetection { copyright: short, @@ -2071,7 +2072,7 @@ fn normalize_french_support_disclaimer_copyrights( let Some(refined_email) = refine_holder_in_copyright_context(email) else { continue; }; - let hkey = (c.start_line, c.end_line, refined_email.clone()); + let hkey = (c.start_line.get(), c.end_line.get(), refined_email.clone()); if !existing_h.contains(&hkey) { to_add_h.push(HolderDetection { holder: refined_email, @@ -2111,13 +2112,13 @@ fn drop_shadowed_email_org_location_suffixes_same_span( let mut exact_c_by_span: HashMap<(usize, usize), HashSet> = HashMap::new(); for c in copyrights.iter() { exact_c_by_span - .entry((c.start_line, c.end_line)) + .entry((c.start_line.get(), c.end_line.get())) .or_default() .insert(c.copyright.clone()); } copyrights.retain(|c| { - let span = (c.start_line, c.end_line); + let span = (c.start_line.get(), c.end_line.get()); let Some(set) = exact_c_by_span.get(&span) else { return true; }; @@ -2138,7 +2139,7 @@ fn drop_shadowed_email_org_location_suffixes_same_span( let mut exact_h_by_span: HashMap<(usize, usize), HashSet> = HashMap::new(); for h in holders.iter() { exact_h_by_span - .entry((h.start_line, h.end_line)) + .entry((h.start_line.get(), h.end_line.get())) .or_default() .insert(h.holder.clone()); } @@ -2155,9 +2156,9 @@ fn drop_shadowed_email_org_location_suffixes_same_span( let Some(refined_email) = refine_holder_in_copyright_context(email) else { continue; }; - let key = (h.start_line, h.end_line, refined_email.clone()); + let key = (h.start_line.get(), h.end_line.get(), refined_email.clone()); if exact_h_by_span - .get(&(h.start_line, h.end_line)) + .get(&(h.start_line.get(), h.end_line.get())) .is_some_and(|set| set.contains(&refined_email)) { continue; @@ -2168,14 +2169,14 @@ fn drop_shadowed_email_org_location_suffixes_same_span( end_line: h.end_line, }); exact_h_by_span - .entry((h.start_line, h.end_line)) + .entry((h.start_line.get(), h.end_line.get())) .or_default() .insert(key.2); } holders.extend(to_add_h); holders.retain(|h| { - let span = (h.start_line, h.end_line); + let span = (h.start_line.get(), h.end_line.get()); let Some(set) = exact_h_by_span.get(&span) else { return true; }; @@ -2207,7 +2208,7 @@ fn add_pipe_read_parenthetical_variants( let existing: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); for i in 0..prepared_cache.len().saturating_sub(1) { @@ -2236,8 +2237,8 @@ fn add_pipe_read_parenthetical_variants( if !existing.contains(&key) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln1, - end_line: ln2, + start_line: LineNumber::new(ln1).expect("valid"), + end_line: LineNumber::new(ln2).expect("valid"), }); } } @@ -2256,7 +2257,7 @@ fn add_from_url_parenthetical_copyright_variants( let existing: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); for i in 0..prepared_cache.len() { @@ -2282,8 +2283,8 @@ fn add_from_url_parenthetical_copyright_variants( if !existing.contains(&key) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -2303,13 +2304,13 @@ fn drop_shadowed_acronym_location_suffix_copyrights_same_span( let mut by_span: HashMap<(usize, usize), HashSet> = HashMap::new(); for c in copyrights.iter() { by_span - .entry((c.start_line, c.end_line)) + .entry((c.start_line.get(), c.end_line.get())) .or_default() .insert(c.copyright.clone()); } copyrights.retain(|c| { - let span = (c.start_line, c.end_line); + let span = (c.start_line.get(), c.end_line.get()); let Some(set) = by_span.get(&span) else { return true; }; @@ -2356,10 +2357,12 @@ fn drop_json_description_metadata_copyrights_and_holders( let mut retained_spans: HashSet<(usize, usize)> = HashSet::new(); copyrights.retain(|copyright| { - let Some(window) = - json_window_for_span(raw_lines, copyright.start_line, copyright.end_line) - else { - retained_spans.insert((copyright.start_line, copyright.end_line)); + let Some(window) = json_window_for_span( + raw_lines, + copyright.start_line.get(), + copyright.end_line.get(), + ) else { + retained_spans.insert((copyright.start_line.get(), copyright.end_line.get())); return true; }; @@ -2371,16 +2374,17 @@ fn drop_json_description_metadata_copyrights_and_holders( || lower.contains("\"url\""); let keep = !description_like || JSON_COPYRIGHT_KEY_RE.is_match(&window); if keep { - retained_spans.insert((copyright.start_line, copyright.end_line)); + retained_spans.insert((copyright.start_line.get(), copyright.end_line.get())); } keep }); holders.retain(|holder| { - if retained_spans.contains(&(holder.start_line, holder.end_line)) { + if retained_spans.contains(&(holder.start_line.get(), holder.end_line.get())) { return true; } - let Some(window) = json_window_for_span(raw_lines, holder.start_line, holder.end_line) + let Some(window) = + json_window_for_span(raw_lines, holder.start_line.get(), holder.end_line.get()) else { return true; }; @@ -2441,7 +2445,7 @@ fn restore_url_slash_before_closing_paren_from_raw_lines( } for c in copyrights.iter_mut() { - for ln in c.start_line..=c.end_line { + for ln in c.start_line.get()..=c.end_line.get() { let Some(pairs) = replacements.get(&ln) else { continue; }; @@ -2555,30 +2559,30 @@ fn extract_mso_document_properties_copyrights( let ckey = (desc_line, end_line, copy_refined.clone()); if !copyrights .iter() - .any(|c| (c.start_line, c.end_line, c.copyright.clone()) == ckey) + .any(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone()) == ckey) { copyrights.push(CopyrightDetection { copyright: copy_refined, - start_line: desc_line, - end_line, + start_line: LineNumber::new(desc_line).expect("valid"), + end_line: LineNumber::new(end_line).expect("valid"), }); } let hkey = (desc_line, end_line, holder_refined.clone()); if !holders .iter() - .any(|h| (h.start_line, h.end_line, h.holder.clone()) == hkey) + .any(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone()) == hkey) { holders.push(HolderDetection { holder: holder_refined, - start_line: desc_line, - end_line, + start_line: LineNumber::new(desc_line).expect("valid"), + end_line: LineNumber::new(end_line).expect("valid"), }); } } let plain = format!("Copyright {year}"); copyrights.retain(|c| { - !(c.start_line == desc_line && c.end_line == desc_line && c.copyright == plain) + !(c.start_line.get() == desc_line && c.end_line.get() == desc_line && c.copyright == plain) }); let shadow_non_confidential = normalize_whitespace(&format!("{last_author} Copyright {year}")); @@ -2591,24 +2595,25 @@ fn extract_mso_document_properties_copyrights( let short_c = format!("Copyright {year} Confidential"); let short_h = "Confidential".to_string(); if let Some(rc) = refine_copyright(&short_c) - && !copyrights - .iter() - .any(|c| c.start_line == desc_line && c.end_line == desc_line && c.copyright == rc) + && !copyrights.iter().any(|c| { + c.start_line.get() == desc_line + && c.end_line.get() == desc_line + && c.copyright == rc + }) { copyrights.push(CopyrightDetection { copyright: rc, - start_line: desc_line, - end_line: desc_line, + start_line: LineNumber::new(desc_line).expect("valid"), + end_line: LineNumber::new(desc_line).expect("valid"), }); } - if !holders - .iter() - .any(|h| h.start_line == desc_line && h.end_line == desc_line && h.holder == short_h) - { + if !holders.iter().any(|h| { + h.start_line.get() == desc_line && h.end_line.get() == desc_line && h.holder == short_h + }) { holders.push(HolderDetection { holder: short_h, - start_line: desc_line, - end_line: desc_line, + start_line: LineNumber::new(desc_line).expect("valid"), + end_line: LineNumber::new(desc_line).expect("valid"), }); } } @@ -2665,14 +2670,14 @@ fn expand_year_only_copyrights_with_by_name_prefix( let mut seen_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); for c in copyrights.iter_mut() { if !YEAR_ONLY_COPY_RE.is_match(c.copyright.trim()) { continue; } - let Some(line) = prepared_cache.get(c.start_line) else { + let Some(line) = prepared_cache.get(c.start_line.get()) else { continue; }; let Some(cap) = BY_NAME_RE.captures(line) else { @@ -2699,7 +2704,7 @@ fn expand_year_only_copyrights_with_by_name_prefix( c.copyright = format!("{name}, Copyright (c) {year}"); if let Some(h) = refine_holder_in_copyright_context(name) { - let key = (c.start_line, c.end_line, h.clone()); + let key = (c.start_line.get(), c.end_line.get(), h.clone()); if seen_h.insert(key) { holders.push(HolderDetection { holder: h, @@ -2725,11 +2730,11 @@ fn expand_year_only_copyrights_with_read_the_suffix( let mut seen_c: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); let current = copyrights.clone(); @@ -2737,13 +2742,13 @@ fn expand_year_only_copyrights_with_read_the_suffix( let mut new_holders = Vec::new(); for c in current.iter() { - if c.start_line != c.end_line { + if c.start_line.get() != c.end_line.get() { continue; } if !YEAR_ONLY_RE.is_match(c.copyright.trim()) { continue; } - let Some(next_line) = prepared_cache.get(c.end_line + 1) else { + let Some(next_line) = prepared_cache.get(c.end_line.get() + 1) else { continue; }; let next_trim = next_line.trim(); @@ -2755,7 +2760,7 @@ fn expand_year_only_copyrights_with_read_the_suffix( let Some(refined) = refine_copyright(&raw) else { continue; }; - let key = (c.start_line, c.end_line + 1, refined.clone()); + let key = (c.start_line.get(), c.end_line.get() + 1, refined.clone()); if seen_c.insert(key) { new_copyrights.push(CopyrightDetection { copyright: refined, @@ -2764,7 +2769,7 @@ fn expand_year_only_copyrights_with_read_the_suffix( }); } if let Some(h) = refine_holder_in_copyright_context(tail) { - let hkey = (c.end_line + 1, c.end_line + 1, h.clone()); + let hkey = (c.end_line.get() + 1, c.end_line.get() + 1, h.clone()); if seen_h.insert(hkey) { new_holders.push(HolderDetection { holder: h, @@ -2837,9 +2842,9 @@ fn merge_multiline_obfuscated_name_year_copyright_pairs( }; let mut updated = false; for c in copyrights.iter_mut() { - if c.start_line == ln1 && c.end_line == ln1 && c.copyright.contains(name1) { + if c.start_line.get() == ln1 && c.end_line.get() == ln1 && c.copyright.contains(name1) { c.copyright = refined.clone(); - c.end_line = ln2; + c.end_line = LineNumber::new(ln2).expect("valid"); updated = true; break; } @@ -2847,26 +2852,26 @@ fn merge_multiline_obfuscated_name_year_copyright_pairs( if !updated { copyrights.push(CopyrightDetection { copyright: refined.clone(), - start_line: ln1, - end_line: ln2, + start_line: LineNumber::new(ln1).expect("valid"), + end_line: LineNumber::new(ln2).expect("valid"), }); } let combined_holder_raw = format!("{name1}, {name2}"); if let Some(h) = refine_holder_in_copyright_context(&combined_holder_raw) { holders.retain(|x| { - !(x.start_line == ln1 - && x.end_line == ln1 + !(x.start_line.get() == ln1 + && x.end_line.get() == ln1 && (x.holder == name1 || x.holder.contains(name1))) }); if !holders .iter() - .any(|x| x.start_line == ln1 && x.end_line == ln2 && x.holder == h) + .any(|x| x.start_line.get() == ln1 && x.end_line.get() == ln2 && x.holder == h) { holders.push(HolderDetection { holder: h, - start_line: ln1, - end_line: ln2, + start_line: LineNumber::new(ln1).expect("valid"), + end_line: LineNumber::new(ln2).expect("valid"), }); } } @@ -2893,7 +2898,7 @@ fn extend_copyrights_with_next_line_parenthesized_obfuscated_email( }); for c in copyrights.iter_mut() { - if c.start_line != c.end_line { + if c.start_line.get() != c.end_line.get() { continue; } @@ -2905,7 +2910,7 @@ fn extend_copyrights_with_next_line_parenthesized_obfuscated_email( continue; } - let prepared_next = match prepared_cache.get(c.end_line + 1) { + let prepared_next = match prepared_cache.get(c.end_line.get() + 1) { Some(p) => p, None => continue, }; @@ -2920,7 +2925,7 @@ fn extend_copyrights_with_next_line_parenthesized_obfuscated_email( }; c.copyright = refined; - c.end_line += 1; + c.end_line += 1usize; } } @@ -2938,7 +2943,7 @@ fn extend_copyrights_with_following_all_rights_reserved_line( LazyLock::new(|| Regex::new(r"(?i)^\s*(?:copyright\b|\(c\))").unwrap()); for c in copyrights.iter_mut() { - if c.start_line != c.end_line { + if c.start_line.get() != c.end_line.get() { continue; } @@ -2952,7 +2957,7 @@ fn extend_copyrights_with_following_all_rights_reserved_line( continue; } - let Some(next_raw) = raw_lines.get(c.end_line) else { + let Some(next_raw) = raw_lines.get(c.end_line.get()) else { continue; }; let next_trim = next_raw.trim().trim_start_matches('*').trim_start(); @@ -2968,7 +2973,7 @@ fn extend_copyrights_with_following_all_rights_reserved_line( } else { merged_normalized }; - c.end_line += 1; + c.end_line += 1usize; } } @@ -2982,11 +2987,11 @@ fn add_modify_suffix_holders( let mut existing: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); for h in holders.clone() { - let idx = h.end_line + 1; + let idx = h.end_line.get() + 1; let Some(next) = prepared_cache.get(idx) else { continue; }; @@ -3008,7 +3013,7 @@ fn add_modify_suffix_holders( continue; } let combined = normalize_whitespace(&format!("{} {t}", h.holder)); - let key = (h.start_line, h.end_line + 1, combined.clone()); + let key = (h.start_line.get(), h.end_line.get() + 1, combined.clone()); if existing.insert(key) { holders.push(HolderDetection { holder: combined, @@ -3039,7 +3044,7 @@ fn drop_shadowed_c_sign_variants(copyrights: &mut Vec) { for c in copyrights.iter() { if contains_c_sign(&c.copyright) { with_c_by_span - .entry((c.start_line, c.end_line)) + .entry((c.start_line.get(), c.end_line.get())) .or_default() .insert(canonical_without_c_sign(&c.copyright)); } @@ -3052,7 +3057,7 @@ fn drop_shadowed_c_sign_variants(copyrights: &mut Vec) { if contains_c_sign(&c.copyright) { return true; } - let Some(set) = with_c_by_span.get(&(c.start_line, c.end_line)) else { + let Some(set) = with_c_by_span.get(&(c.start_line.get(), c.end_line.get())) else { return true; }; let canon = canonical_without_c_sign(&c.copyright); @@ -3070,7 +3075,7 @@ fn drop_shadowed_year_prefixed_holders(holders: &mut Vec) { let mut by_span: HashMap<(usize, usize), HashSet> = HashMap::new(); for h in holders.iter() { by_span - .entry((h.start_line, h.end_line)) + .entry((h.start_line.get(), h.end_line.get())) .or_default() .insert(normalize_whitespace(&h.holder)); } @@ -3091,7 +3096,7 @@ fn drop_shadowed_year_prefixed_holders(holders: &mut Vec) { } holders.retain(|h| { - let Some(set) = by_span.get(&(h.start_line, h.end_line)) else { + let Some(set) = by_span.get(&(h.start_line.get(), h.end_line.get())) else { return true; }; let normalized = normalize_whitespace(&h.holder); @@ -3115,7 +3120,7 @@ fn drop_shadowed_for_clause_holders_with_email_copyrights( let mut spans_with_email: HashSet<(usize, usize)> = HashSet::new(); for c in copyrights { if c.copyright.contains('@') { - spans_with_email.insert((c.start_line, c.end_line)); + spans_with_email.insert((c.start_line.get(), c.end_line.get())); } } if spans_with_email.is_empty() { @@ -3125,13 +3130,13 @@ fn drop_shadowed_for_clause_holders_with_email_copyrights( let mut by_span: HashMap<(usize, usize), Vec> = HashMap::new(); for h in holders.iter() { by_span - .entry((h.start_line, h.end_line)) + .entry((h.start_line.get(), h.end_line.get())) .or_default() .push(h.holder.clone()); } holders.retain(|h| { - let span = (h.start_line, h.end_line); + let span = (h.start_line.get(), h.end_line.get()); if !spans_with_email.contains(&span) { return true; } @@ -3177,11 +3182,11 @@ fn drop_shadowed_multiline_prefix_copyrights(copyrights: &mut Vec = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); copyrights.retain(|c| { - if c.start_line != c.end_line { + if c.start_line.get() != c.end_line.get() { return true; } let short = c.copyright.as_str(); @@ -3190,8 +3195,8 @@ fn drop_shadowed_multiline_prefix_copyrights(copyrights: &mut Vec c.end_line + *s == c.start_line.get() + && *e > c.end_line.get() && other.len() > short.len() && other.starts_with(short) && other @@ -3209,11 +3214,11 @@ fn drop_shadowed_multiline_prefix_holders(holders: &mut Vec) { let all: Vec<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); holders.retain(|h| { - if h.start_line != h.end_line { + if h.start_line.get() != h.end_line.get() { return true; } let short = h.holder.as_str(); @@ -3222,8 +3227,8 @@ fn drop_shadowed_multiline_prefix_holders(holders: &mut Vec) { } !all.iter().any(|(s, e, other)| { - *s == h.start_line - && *e > h.end_line + *s == h.start_line.get() + && *e > h.end_line.get() && other.len() > short.len() && other.starts_with(short) && { @@ -3252,7 +3257,7 @@ fn replace_holders_with_embedded_c_year_markers( let mut to_add: Vec = Vec::new(); let mut seen: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); holders.retain(|h| { @@ -3260,12 +3265,11 @@ fn replace_holders_with_embedded_c_year_markers( return true; } - for c in copyrights - .iter() - .filter(|c| c.start_line == h.start_line && c.end_line == h.end_line) - { + for c in copyrights.iter().filter(|c| { + c.start_line.get() == h.start_line.get() && c.end_line.get() == h.end_line.get() + }) { if let Some(derived) = derive_holder_from_simple_copyright_string(&c.copyright) { - let key = (h.start_line, h.end_line, derived.clone()); + let key = (h.start_line.get(), h.end_line.get(), derived.clone()); if seen.insert(key) { to_add.push(HolderDetection { holder: derived, @@ -3303,7 +3307,7 @@ fn extend_year_only_copyrights_with_trailing_text( let mut seen_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); for c in copyrights.iter_mut() { @@ -3311,7 +3315,7 @@ fn extend_year_only_copyrights_with_trailing_text( continue; } - let Some(prepared) = prepared_cache.get(c.start_line) else { + let Some(prepared) = prepared_cache.get(c.start_line.get()) else { continue; }; let line = prepared.trim(); @@ -3335,7 +3339,7 @@ fn extend_year_only_copyrights_with_trailing_text( c.copyright = refined.clone(); if let Some(h) = refine_holder_in_copyright_context(tail) { - let key = (c.start_line, c.end_line, h.clone()); + let key = (c.start_line.get(), c.end_line.get(), h.clone()); if seen_h.insert(key) { holders.push(HolderDetection { holder: h, @@ -3365,11 +3369,11 @@ fn extract_licensed_material_of_company_bare_c_year_lines( let mut seen_c: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); for ln in 1..=prepared_cache.raw_line_count() { @@ -3398,8 +3402,8 @@ fn extract_licensed_material_of_company_bare_c_year_lines( if seen_c.insert(ckey) { copyrights.push(CopyrightDetection { copyright: cr, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -3408,14 +3412,16 @@ fn extract_licensed_material_of_company_bare_c_year_lines( if seen_h.insert(hkey) { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } copyrights.retain(|c| { - !(c.start_line == ln && c.end_line == ln && c.copyright == format!("(c) {year}")) + !(c.start_line.get() == ln + && c.end_line.get() == ln + && c.copyright == format!("(c) {year}")) }); } } @@ -3444,11 +3450,11 @@ fn merge_year_only_copyrights_with_following_author_colon_lines( let mut seen_c: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); for i in 1..prepared_cache.raw_line_count() { @@ -3457,7 +3463,7 @@ fn merge_year_only_copyrights_with_following_author_colon_lines( let Some(prev) = copyrights .iter() - .find(|c| c.start_line == ln1 && c.end_line == ln1) + .find(|c| c.start_line.get() == ln1 && c.end_line.get() == ln1) else { continue; }; @@ -3501,8 +3507,8 @@ fn merge_year_only_copyrights_with_following_author_colon_lines( if seen_c.insert(ckey) { copyrights.push(CopyrightDetection { copyright: cr, - start_line: ln1, - end_line: ln2, + start_line: LineNumber::new(ln1).expect("valid"), + end_line: LineNumber::new(ln2).expect("valid"), }); } if let Some(h) = refine_holder_in_copyright_context(name) { @@ -3510,15 +3516,15 @@ fn merge_year_only_copyrights_with_following_author_colon_lines( if seen_h.insert(hkey) { holders.push(HolderDetection { holder: h, - start_line: ln1, - end_line: ln2, + start_line: LineNumber::new(ln1).expect("valid"), + end_line: LineNumber::new(ln2).expect("valid"), }); } } copyrights.retain(|c| { - !(c.start_line == ln1 - && c.end_line == ln1 + !(c.start_line.get() == ln1 + && c.end_line.get() == ln1 && YEAR_ONLY_COPY_RE.is_match(c.copyright.as_str())) }); } @@ -3539,11 +3545,11 @@ fn extract_question_mark_year_copyrights( let mut seen_c: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for ln in 1..=prepared_cache.len() { @@ -3567,8 +3573,8 @@ fn extract_question_mark_year_copyrights( if seen_c.insert((ln, cr.clone())) { copyrights.push(CopyrightDetection { copyright: cr, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -3578,8 +3584,8 @@ fn extract_question_mark_year_copyrights( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -3726,11 +3732,11 @@ fn extend_copyrights_with_authors_blocks( for c in copyrights .iter_mut() - .filter(|c| c.start_line == ln && c.end_line == ln) + .filter(|c| c.start_line.get() == ln && c.end_line.get() == ln) { if c.copyright.starts_with("Copyright") || c.copyright.starts_with("(c)") { c.copyright = extended.clone(); - c.end_line = end_ln; + c.end_line = LineNumber::new(end_ln).expect("valid"); } } @@ -3739,12 +3745,12 @@ fn extend_copyrights_with_authors_blocks( { holders.push(HolderDetection { holder: h.clone(), - start_line: ln, - end_line: end_ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(end_ln).expect("valid"), }); holders.retain(|hh| { - if hh.start_line != ln || hh.end_line != ln { + if hh.start_line.get() != ln || hh.end_line.get() != ln { return true; } if hh.holder == h { @@ -3767,14 +3773,14 @@ fn drop_wider_duplicate_holder_spans(holders: &mut Vec) { by_text .entry(h.holder.clone()) .or_default() - .push((h.start_line, h.end_line)); + .push((h.start_line.get(), h.end_line.get())); } holders.retain(|h| { let Some(spans) = by_text.get(&h.holder) else { return true; }; - let (s, e) = (h.start_line, h.end_line); + let (s, e) = (h.start_line.get(), h.end_line.get()); !spans .iter() .any(|(os, oe)| (*os, *oe) != (s, e) && *os >= s && *oe <= e && (*os > s || *oe < e)) @@ -3819,8 +3825,8 @@ fn apply_openoffice_org_report_builder_bin_normalizations( if let Some(cr) = refine_copyright(want_cr) { copyrights.push(CopyrightDetection { copyright: cr, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -3829,8 +3835,8 @@ fn apply_openoffice_org_report_builder_bin_normalizations( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -3888,16 +3894,16 @@ fn extract_midline_c_year_holder_with_leading_acronym( copyrights.push(CopyrightDetection { copyright: cr, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); let holder_raw = format!("{holder} {prefix}"); if let Some(h) = refine_holder_in_copyright_context(&holder_raw) { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -3908,7 +3914,7 @@ fn dedupe_exact_span_copyrights(copyrights: &mut Vec) { return; } let mut seen: HashSet<(usize, usize, String)> = HashSet::new(); - copyrights.retain(|c| seen.insert((c.start_line, c.end_line, c.copyright.clone()))); + copyrights.retain(|c| seen.insert((c.start_line.get(), c.end_line.get(), c.copyright.clone()))); } fn dedupe_exact_span_holders(holders: &mut Vec) { @@ -3916,7 +3922,7 @@ fn dedupe_exact_span_holders(holders: &mut Vec) { return; } let mut seen: HashSet<(usize, usize, String)> = HashSet::new(); - holders.retain(|h| seen.insert((h.start_line, h.end_line, h.holder.clone()))); + holders.retain(|h| seen.insert((h.start_line.get(), h.end_line.get(), h.holder.clone()))); } fn dedupe_exact_span_authors(authors: &mut Vec) { @@ -3924,7 +3930,7 @@ fn dedupe_exact_span_authors(authors: &mut Vec) { return; } let mut seen: HashSet<(usize, usize, String)> = HashSet::new(); - authors.retain(|a| seen.insert((a.start_line, a.end_line, a.author.clone()))); + authors.retain(|a| seen.insert((a.start_line.get(), a.end_line.get(), a.author.clone()))); } fn drop_shadowed_prefix_bare_c_copyrights_same_span(copyrights: &mut Vec) { @@ -3937,13 +3943,13 @@ fn drop_shadowed_prefix_bare_c_copyrights_same_span(copyrights: &mut Vec> = HashMap::new(); for c in copyrights.iter() { by_span - .entry((c.start_line, c.end_line)) + .entry((c.start_line.get(), c.end_line.get())) .or_default() .push(c.copyright.clone()); } copyrights.retain(|c| { - let Some(group) = by_span.get(&(c.start_line, c.end_line)) else { + let Some(group) = by_span.get(&(c.start_line.get(), c.end_line.get())) else { return true; }; let short = c.copyright.trim(); @@ -3975,13 +3981,13 @@ fn drop_shadowed_acronym_extended_holders(holders: &mut Vec) { let mut by_span: HashMap<(usize, usize), Vec> = HashMap::new(); for h in holders.iter() { by_span - .entry((h.start_line, h.end_line)) + .entry((h.start_line.get(), h.end_line.get())) .or_default() .push(h.holder.clone()); } holders.retain(|h| { - let Some(group) = by_span.get(&(h.start_line, h.end_line)) else { + let Some(group) = by_span.get(&(h.start_line.get(), h.end_line.get())) else { return true; }; let candidate = h.holder.trim(); @@ -4123,21 +4129,21 @@ fn extend_multiline_copyright_c_no_year_names( for c in copyrights .iter_mut() - .filter(|c| c.start_line == *start_ln && !YEAR_RE.is_match(c.copyright.as_str())) + .filter(|c| c.start_line.get() == *start_ln && !YEAR_RE.is_match(c.copyright.as_str())) { if refined.len() > c.copyright.len() && refined.starts_with(&c.copyright) { c.copyright = refined.clone(); - c.end_line = end_ln; + c.end_line = LineNumber::new(end_ln).expect("valid"); } } for h in holders .iter_mut() - .filter(|h| h.start_line == *start_ln && !YEAR_RE.is_match(h.holder.as_str())) + .filter(|h| h.start_line.get() == *start_ln && !YEAR_RE.is_match(h.holder.as_str())) { if refined_holder.len() > h.holder.len() && refined_holder.starts_with(&h.holder) { h.holder = refined_holder.clone(); - h.end_line = end_ln; + h.end_line = LineNumber::new(end_ln).expect("valid"); } } } @@ -4241,22 +4247,27 @@ fn extend_multiline_copyright_c_year_holder_continuations( }; let mut updated_copyright = false; - for c in copyrights.iter_mut().filter(|c| c.start_line == *start_ln) { + for c in copyrights + .iter_mut() + .filter(|c| c.start_line.get() == *start_ln) + { if refined.len() > c.copyright.len() && refined.starts_with(&c.copyright) { c.copyright = refined.clone(); - c.end_line = end_ln; + c.end_line = LineNumber::new(end_ln).expect("valid"); updated_copyright = true; } } if !updated_copyright && !copyrights.iter().any(|c| { - c.start_line == *start_ln && c.end_line == end_ln && c.copyright == refined + c.start_line.get() == *start_ln + && c.end_line.get() == end_ln + && c.copyright == refined }) { copyrights.push(CopyrightDetection { copyright: refined.clone(), - start_line: *start_ln, - end_line: end_ln, + start_line: LineNumber::new(*start_ln).expect("invalid line number"), + end_line: LineNumber::new(end_ln).expect("valid"), }); } @@ -4265,22 +4276,27 @@ fn extend_multiline_copyright_c_year_holder_continuations( }; let mut updated_holder = false; - for h in holders.iter_mut().filter(|h| h.start_line == *start_ln) { + for h in holders + .iter_mut() + .filter(|h| h.start_line.get() == *start_ln) + { if refined_holder.len() > h.holder.len() && refined_holder.starts_with(&h.holder) { h.holder = refined_holder.clone(); - h.end_line = end_ln; + h.end_line = LineNumber::new(end_ln).expect("valid"); updated_holder = true; } } if !updated_holder && !holders.iter().any(|h| { - h.start_line == *start_ln && h.end_line == end_ln && h.holder == refined_holder + h.start_line.get() == *start_ln + && h.end_line.get() == end_ln + && h.holder == refined_holder }) { holders.push(HolderDetection { holder: refined_holder, - start_line: *start_ln, - end_line: end_ln, + start_line: LineNumber::new(*start_ln).expect("invalid line number"), + end_line: LineNumber::new(end_ln).expect("valid"), }); } } @@ -4334,18 +4350,18 @@ fn extend_authors_see_url_copyrights( }; let refined_holder = derive_holder_from_simple_copyright_string(&format!("{prefix} see")); - for c in copyrights.iter_mut().filter(|c| c.start_line == *ln1) { + for c in copyrights.iter_mut().filter(|c| c.start_line.get() == *ln1) { if refined.len() > c.copyright.len() && refined.starts_with(&c.copyright) { c.copyright = refined.clone(); - c.end_line = *ln2; + c.end_line = LineNumber::new(*ln2).expect("valid"); } } if let Some(refined_holder) = refined_holder { - for h in holders.iter_mut().filter(|h| h.start_line == *ln1) { + for h in holders.iter_mut().filter(|h| h.start_line.get() == *ln1) { if refined_holder.len() > h.holder.len() && refined_holder.starts_with(&h.holder) { h.holder = refined_holder.clone(); - h.end_line = *ln2; + h.end_line = LineNumber::new(*ln2).expect("valid"); } } } @@ -4368,7 +4384,7 @@ fn extend_leading_dash_suffixes( continue; } - let has_copyright_on_line1 = copyrights.iter().any(|c| c.start_line == *ln1); + let has_copyright_on_line1 = copyrights.iter().any(|c| c.start_line.get() == *ln1); if !has_copyright_on_line1 { continue; } @@ -4404,7 +4420,7 @@ fn extend_leading_dash_suffixes( let suffix = format!("- {tail}"); - for c in copyrights.iter_mut().filter(|c| c.start_line == *ln1) { + for c in copyrights.iter_mut().filter(|c| c.start_line.get() == *ln1) { if c.copyright.contains(&suffix) { continue; } @@ -4413,21 +4429,21 @@ fn extend_leading_dash_suffixes( .filter(|r| r.contains(tail)) .unwrap_or(extended); c.copyright = refined; - c.end_line = *ln2; + c.end_line = LineNumber::new(*ln2).expect("valid"); } let derived: Option = copyrights .iter() - .find(|c| c.start_line == *ln1) + .find(|c| c.start_line.get() == *ln1) .and_then(|c| derive_holder_from_simple_copyright_string(&c.copyright)); - for h in holders.iter_mut().filter(|h| h.start_line == *ln1) { + for h in holders.iter_mut().filter(|h| h.start_line.get() == *ln1) { if let Some(ref d) = derived && d.len() >= h.holder.len() && d.contains(tail) { h.holder = d.clone(); - h.end_line = *ln2; + h.end_line = LineNumber::new(*ln2).expect("valid"); continue; } @@ -4440,10 +4456,10 @@ fn extend_leading_dash_suffixes( && refined.contains(tail) { h.holder = refined; - h.end_line = *ln2; + h.end_line = LineNumber::new(*ln2).expect("valid"); } else { h.holder = extended; - h.end_line = *ln2; + h.end_line = LineNumber::new(*ln2).expect("valid"); } } } @@ -4468,12 +4484,12 @@ fn extend_dash_obfuscated_email_suffixes( }); for (ln, _) in group { - if !copyrights.iter().any(|c| c.start_line == *ln) { + if !copyrights.iter().any(|c| c.start_line.get() == *ln) { continue; } let has_named_holder = holders.iter().any(|h| { - h.start_line == *ln + h.start_line.get() == *ln && !h.holder.to_ascii_lowercase().contains(" at ") && !h.holder.to_ascii_lowercase().contains(" dot ") }); @@ -4496,7 +4512,7 @@ fn extend_dash_obfuscated_email_suffixes( let obfuscated = format!("{user} at {host} dot {tld}"); - for c in copyrights.iter_mut().filter(|c| c.start_line == *ln) { + for c in copyrights.iter_mut().filter(|c| c.start_line.get() == *ln) { if c.copyright.contains(&obfuscated) { continue; } @@ -4527,7 +4543,7 @@ fn extend_trailing_copy_year_suffixes( for c in copyrights .iter_mut() - .filter(|c| c.start_line == *ln && c.end_line == *ln) + .filter(|c| c.start_line.get() == *ln && c.end_line.get() == *ln) { let lower = c.copyright.to_ascii_lowercase(); if !lower.starts_with("copyright") { @@ -4566,27 +4582,29 @@ fn extend_w3c_registered_org_list_suffixes( for c in copyrights .iter_mut() - .filter(|c| c.start_line == *ln || c.start_line + 1 == *ln) + .filter(|c| c.start_line.get() == *ln || c.start_line.get() + 1 == *ln) { if c.copyright.contains(&full) { continue; } if c.copyright.contains("W3C(r)") { c.copyright = c.copyright.replace("W3C(r)", &full); - c.end_line = c.end_line.max(*ln); + c.end_line = + LineNumber::new(c.end_line.get().max(*ln)).expect("invalid line number"); } } for h in holders .iter_mut() - .filter(|h| h.start_line == *ln || h.start_line + 1 == *ln) + .filter(|h| h.start_line.get() == *ln || h.start_line.get() + 1 == *ln) { if h.holder.contains(&full) { continue; } if h.holder == "W3C(r)" { h.holder = full.clone(); - h.end_line = h.end_line.max(*ln); + h.end_line = + LineNumber::new(h.end_line.get().max(*ln)).expect("invalid line number"); } } } @@ -4612,7 +4630,9 @@ fn drop_symbol_year_only_copyrights(content: &str, copyrights: &mut Vec = copyrights.iter().map(|c| c.copyright.clone()).collect(); let mut seen_h: HashSet<(String, usize)> = holders .iter() - .map(|h| (h.holder.clone(), h.start_line)) + .map(|h| (h.holder.clone(), h.start_line.get())) .collect(); for (idx, line) in content.lines().enumerate() { @@ -6347,8 +6384,8 @@ fn extract_spdx_filecopyrighttext_c_without_year( { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -6357,8 +6394,8 @@ fn extract_spdx_filecopyrighttext_c_without_year( { holders.push(HolderDetection { holder, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -6389,7 +6426,7 @@ fn extract_html_meta_name_copyright_content( let mut seen_cr: HashSet = copyrights.iter().map(|c| c.copyright.clone()).collect(); let mut seen_h: HashSet<(String, usize)> = holders .iter() - .map(|h| (h.holder.clone(), h.start_line)) + .map(|h| (h.holder.clone(), h.start_line.get())) .collect(); for (idx, line) in content.lines().enumerate() { @@ -6416,8 +6453,8 @@ fn extract_html_meta_name_copyright_content( { copyrights.push(CopyrightDetection { copyright: refined.clone(), - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); if let Some(holder) = derive_holder_from_simple_copyright_string(&refined) @@ -6425,8 +6462,8 @@ fn extract_html_meta_name_copyright_content( { holders.push(HolderDetection { holder, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -6448,7 +6485,7 @@ fn extract_added_the_copyright_year_for_lines( let mut seen_cr: HashSet = copyrights.iter().map(|c| c.copyright.clone()).collect(); let mut seen_h: HashSet<(String, usize)> = holders .iter() - .map(|h| (h.holder.clone(), h.start_line)) + .map(|h| (h.holder.clone(), h.start_line.get())) .collect(); for idx in 0..prepared_cache.len() { @@ -6470,16 +6507,16 @@ fn extract_added_the_copyright_year_for_lines( if seen_cr.insert(cr.clone()) { copyrights.push(CopyrightDetection { copyright: cr, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } if seen_h.insert((holder.clone(), ln)) { holders.push(HolderDetection { holder, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -6496,7 +6533,7 @@ fn extract_changelog_timestamp_copyrights_from_content( let mut seen_cr: HashSet = copyrights.iter().map(|c| c.copyright.clone()).collect(); let mut seen_h: HashSet<(String, usize)> = holders .iter() - .map(|h| (h.holder.clone(), h.start_line)) + .map(|h| (h.holder.clone(), h.start_line.get())) .collect(); let mut matches: Vec<(usize, String, String)> = Vec::new(); @@ -6529,8 +6566,8 @@ fn extract_changelog_timestamp_copyrights_from_content( { copyrights.push(CopyrightDetection { copyright: refined, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } @@ -6539,8 +6576,8 @@ fn extract_changelog_timestamp_copyrights_from_content( { holders.push(HolderDetection { holder, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -6652,8 +6689,8 @@ fn extract_common_year_only_lines( if seen.insert(refined.clone()) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -6740,8 +6777,8 @@ fn extract_embedded_bare_c_year_suffixes( if seen.insert(cr_lower) { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -6791,8 +6828,8 @@ fn extract_trailing_bare_c_year_range_suffixes( { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -6865,8 +6902,8 @@ fn extract_repeated_embedded_bare_c_year_suffixes( { copyrights.push(CopyrightDetection { copyright: refined, - start_line: first_ln, - end_line: first_ln, + start_line: LineNumber::new(first_ln).expect("valid"), + end_line: LineNumber::new(first_ln).expect("valid"), }); } } @@ -6880,8 +6917,8 @@ fn extract_repeated_embedded_bare_c_year_suffixes( { copyrights.push(CopyrightDetection { copyright: refined, - start_line: first_ln, - end_line: first_ln, + start_line: LineNumber::new(first_ln).expect("valid"), + end_line: LineNumber::new(first_ln).expect("valid"), }); } } @@ -6923,16 +6960,16 @@ fn extract_lowercase_username_angle_email_copyrights( { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } if seen_holders.insert(user.to_string()) { holders.push(HolderDetection { holder: user.to_string(), - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -6971,16 +7008,16 @@ fn extract_lowercase_username_paren_email_copyrights( { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } if seen_holders.insert(user.to_string()) { holders.push(HolderDetection { holder: user.to_string(), - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -7024,8 +7061,8 @@ fn extract_c_year_range_by_name_comma_email_lines( { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } @@ -7034,8 +7071,8 @@ fn extract_c_year_range_by_name_comma_email_lines( { holders.push(HolderDetection { holder: h, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -7085,16 +7122,16 @@ fn extract_copyright_years_by_name_paren_email_lines( if seen_copyrights.insert(full.to_ascii_lowercase()) { copyrights.push(CopyrightDetection { copyright: full.clone(), - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } let year_only_raw = format!("copyright {years}"); if let Some(year_only) = refine_copyright(&year_only_raw) { copyrights.retain(|c| { - !(c.start_line == *ln - && c.end_line == *ln + !(c.start_line.get() == *ln + && c.end_line.get() == *ln && c.copyright == year_only && c.copyright != full) }); @@ -7105,8 +7142,8 @@ fn extract_copyright_years_by_name_paren_email_lines( { holders.push(HolderDetection { holder, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -7199,15 +7236,17 @@ fn extract_copyright_years_by_name_then_paren_email_next_line( { copyrights.push(CopyrightDetection { copyright: full, - start_line: ln, - end_line: next_ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(next_ln).expect("valid"), }); } let year_only_raw = format!("copyright {years}"); if let Some(year_only) = refine_copyright(&year_only_raw) { copyrights.retain(|c| { - !(c.start_line == ln && c.end_line == ln && c.copyright == year_only) + !(c.start_line.get() == ln + && c.end_line.get() == ln + && c.copyright == year_only) }); } @@ -7216,8 +7255,8 @@ fn extract_copyright_years_by_name_then_paren_email_next_line( { holders.push(HolderDetection { holder, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -7261,8 +7300,8 @@ fn extract_copyright_year_name_with_of_lines( { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } @@ -7271,8 +7310,8 @@ fn extract_copyright_year_name_with_of_lines( { holders.push(HolderDetection { holder: h, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -7385,7 +7424,9 @@ fn extract_standalone_c_holder_year_lines( } let already_covered = copyrights.iter().any(|c| { - c.start_line <= *ln && c.end_line >= *ln && c.copyright.contains(&yearish) + c.start_line.get() <= *ln + && c.end_line.get() >= *ln + && c.copyright.contains(&yearish) }); if already_covered { continue; @@ -7426,16 +7467,16 @@ fn extract_standalone_c_holder_year_lines( { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), end_line: if email_suffix.is_some() { group .iter() .skip(idx + 1) .find(|(_, l)| !l.trim().is_empty()) - .map(|(n, _)| *n) - .unwrap_or(*ln) + .map(|(n, _)| LineNumber::new(*n).expect("invalid line number")) + .unwrap_or(LineNumber::new(*ln).expect("invalid line number")) } else { - *ln + LineNumber::new(*ln).expect("invalid line number") }, }); } @@ -7445,8 +7486,8 @@ fn extract_standalone_c_holder_year_lines( { holders.push(HolderDetection { holder: h, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -7509,8 +7550,8 @@ fn extract_c_holder_without_year_lines( { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } @@ -7519,8 +7560,8 @@ fn extract_c_holder_without_year_lines( { holders.push(HolderDetection { holder, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -7534,11 +7575,11 @@ fn extract_c_years_then_holder_lines( ) { let mut seen_cr: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for group in groups { @@ -7584,8 +7625,8 @@ fn extract_c_years_then_holder_lines( if seen_cr.insert((*ln, cr.clone())) { copyrights.push(CopyrightDetection { copyright: cr.clone(), - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } @@ -7594,8 +7635,8 @@ fn extract_c_years_then_holder_lines( { holders.push(HolderDetection { holder: h, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -7616,11 +7657,11 @@ fn extract_copyright_c_years_holder_lines( let mut seen_c: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for group in groups { @@ -7647,8 +7688,8 @@ fn extract_copyright_c_years_holder_lines( if seen_c.insert((*ln, cr.clone())) { copyrights.push(CopyrightDetection { copyright: cr.clone(), - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } @@ -7657,8 +7698,8 @@ fn extract_copyright_c_years_holder_lines( { holders.push(HolderDetection { holder: h, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -7680,11 +7721,11 @@ fn extract_three_digit_copyright_year_lines( let mut seen_cr: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for idx in 0..prepared_cache.len() { @@ -7715,8 +7756,8 @@ fn extract_three_digit_copyright_year_lines( if seen_cr.insert((ln, refined.clone())) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -7725,8 +7766,8 @@ fn extract_three_digit_copyright_year_lines( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -7747,11 +7788,11 @@ fn extract_copyrighted_by_lines( let mut seen_cr: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for idx in 0..prepared_cache.len() { @@ -7785,8 +7826,8 @@ fn extract_copyrighted_by_lines( if seen_cr.insert((ln, refined.clone())) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -7795,8 +7836,8 @@ fn extract_copyrighted_by_lines( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -7818,11 +7859,11 @@ fn extract_c_word_year_lines( let mut seen_cr: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for idx in 0..prepared_cache.len() { @@ -7862,8 +7903,8 @@ fn extract_c_word_year_lines( if seen_cr.insert((ln, refined.clone())) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -7872,8 +7913,8 @@ fn extract_c_word_year_lines( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -7898,11 +7939,11 @@ fn extract_are_c_year_holder_lines( let mut seen_cr: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for ln in 1..=prepared_cache.len() { @@ -7938,8 +7979,8 @@ fn extract_are_c_year_holder_lines( if seen_cr.insert((ln, refined.clone())) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -7948,8 +7989,8 @@ fn extract_are_c_year_holder_lines( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -7970,11 +8011,11 @@ fn extract_bare_c_by_holder_lines( let mut seen_cr: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for ln in 1..=prepared_cache.len() { @@ -7999,8 +8040,8 @@ fn extract_bare_c_by_holder_lines( if seen_cr.insert((ln, refined.clone())) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } if let Some(h) = refine_holder_in_copyright_context(holder_raw) @@ -8008,8 +8049,8 @@ fn extract_bare_c_by_holder_lines( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -8031,11 +8072,11 @@ fn extract_all_rights_reserved_by_holder_lines( let mut seen_cr: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for ln in 1..=prepared_cache.len() { @@ -8061,8 +8102,8 @@ fn extract_all_rights_reserved_by_holder_lines( if seen_cr.insert((ln, refined.clone())) { copyrights.push(CopyrightDetection { copyright: refined, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -8071,8 +8112,8 @@ fn extract_all_rights_reserved_by_holder_lines( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -8096,11 +8137,11 @@ fn extract_holder_is_name_paren_email_lines( let mut seen_c: HashSet<(usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.copyright.clone())) .collect(); let mut seen_h: HashSet<(usize, String)> = holders .iter() - .map(|h| (h.start_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.holder.clone())) .collect(); for ln in 1..=prepared_cache.len() { @@ -8124,8 +8165,8 @@ fn extract_holder_is_name_paren_email_lines( if seen_c.insert((ln, cr.clone())) { copyrights.push(CopyrightDetection { copyright: cr, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } @@ -8134,8 +8175,8 @@ fn extract_holder_is_name_paren_email_lines( { holders.push(HolderDetection { holder: h, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -8246,8 +8287,8 @@ fn extract_copr_lines( if seen_copyrights.insert(cr.clone()) { copyrights.push(CopyrightDetection { copyright: cr.clone(), - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } @@ -8276,8 +8317,8 @@ fn extract_copr_lines( if seen_holders.insert(h.clone()) { holders.push(HolderDetection { holder: h, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -8354,7 +8395,7 @@ fn apply_javadoc_company_metadata( let ln = copy_cap .get(0) - .map(|m| line_number_index.line_number_at_offset(m.start())) + .map(|m| line_number_index.line_number_at_offset(m.start()).get()) .unwrap_or(1); let append_company_value = company_val.split_whitespace().count() >= 2; @@ -8376,16 +8417,16 @@ fn apply_javadoc_company_metadata( if !copyrights.iter().any(|c| c.copyright == desired_copyright) { copyrights.push(CopyrightDetection { copyright: desired_copyright, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } if !holders.iter().any(|h| h.holder == company_holder) { holders.push(HolderDetection { holder: company_holder, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).unwrap(), + end_line: LineNumber::new(ln).unwrap(), }); } } @@ -8515,11 +8556,11 @@ fn normalize_pudn_html_footer_copyrights( }); let mut seen_copyrights: HashSet<(usize, usize, String)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line, c.copyright.clone())) + .map(|c| (c.start_line.get(), c.end_line.get(), c.copyright.clone())) .collect(); let mut seen_holders: HashSet<(usize, usize, String)> = holders .iter() - .map(|h| (h.start_line, h.end_line, h.holder.clone())) + .map(|h| (h.start_line.get(), h.end_line.get(), h.holder.clone())) .collect(); let mut saw_pudn_footer = false; @@ -8553,7 +8594,7 @@ fn normalize_pudn_html_footer_copyrights( let expected_copyright = normalize_whitespace(&format!("(c) {years} pudn.com")); let expected_holder = "pudn.com".to_string(); - let ckey = (ln, ln, expected_copyright.clone()); + let ckey = (ln.get(), ln.get(), expected_copyright.clone()); if seen_copyrights.insert(ckey) { copyrights.push(CopyrightDetection { copyright: expected_copyright.clone(), @@ -8562,7 +8603,7 @@ fn normalize_pudn_html_footer_copyrights( }); } - let hkey = (ln, ln, expected_holder.clone()); + let hkey = (ln.get(), ln.get(), expected_holder.clone()); if seen_holders.insert(hkey) { holders.push(HolderDetection { holder: expected_holder, @@ -8683,8 +8724,8 @@ fn fallback_year_only_copyrights(groups: &[Vec<(usize, String)>]) -> Vec]) -> Vec]) -> Vec"); - if let Some(existing) = copyrights - .iter_mut() - .find(|c| c.start_line == *ln && c.end_line == *ln && c.copyright == short) - { + if let Some(existing) = copyrights.iter_mut().find(|c| { + c.start_line.get() == *ln && c.end_line.get() == *ln && c.copyright == short + }) { existing.copyright = full.clone(); } else { copyrights.push(CopyrightDetection { copyright: full.clone(), - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } seen_copyrights.insert(full_lower); @@ -9794,8 +9834,8 @@ fn extract_angle_bracket_year_name_copyrights( if seen_holders.insert(holder_lower) { holders.push(HolderDetection { holder, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -9840,7 +9880,7 @@ fn extract_html_icon_class_copyrights( let ln = cap .get(0) .map(|m| line_number_index.line_number_at_offset(m.start())) - .unwrap_or(1); + .unwrap_or(LineNumber::ONE); let year = cap.name("year").map(|m| m.as_str()).unwrap_or("").trim(); if year.is_empty() { continue; @@ -9871,7 +9911,7 @@ fn extract_html_icon_class_copyrights( let ln = cap .get(0) .map(|m| line_number_index.line_number_at_offset(m.start())) - .unwrap_or(1); + .unwrap_or(LineNumber::ONE); let url = cap.name("url").map(|m| m.as_str()).unwrap_or("").trim(); if url.is_empty() { continue; @@ -9906,7 +9946,7 @@ fn extract_html_icon_class_copyrights( let ln = cap .get(0) .map(|m| line_number_index.line_number_at_offset(m.start())) - .unwrap_or(1); + .unwrap_or(LineNumber::ONE); let years = cap.name("years").map(|m| m.as_str()).unwrap_or("").trim(); if years.is_empty() { continue; @@ -9973,8 +10013,8 @@ fn extract_copyright_year_c_name_angle_email_lines( { copyrights.push(CopyrightDetection { copyright: cr, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } @@ -9983,8 +10023,8 @@ fn extract_copyright_year_c_name_angle_email_lines( { holders.push(HolderDetection { holder: h, - start_line: *ln, - end_line: *ln, + start_line: LineNumber::new(*ln).expect("invalid line number"), + end_line: LineNumber::new(*ln).expect("invalid line number"), }); } } @@ -10039,8 +10079,8 @@ fn extract_copyright_by_without_year_lines( let end_line = group.last().map(|(n, _)| *n).unwrap_or(start_line); copyrights.push(CopyrightDetection { copyright: cr, - start_line, - end_line, + start_line: LineNumber::new(start_line).expect("valid"), + end_line: LineNumber::new(end_line).expect("valid"), }); } @@ -10051,8 +10091,8 @@ fn extract_copyright_by_without_year_lines( let end_line = group.last().map(|(n, _)| *n).unwrap_or(start_line); holders.push(HolderDetection { holder, - start_line, - end_line, + start_line: LineNumber::new(start_line).expect("valid"), + end_line: LineNumber::new(end_line).expect("valid"), }); } } @@ -10067,13 +10107,13 @@ fn drop_shadowed_and_or_holders(holders: &mut Vec) { let mut by_span: HashMap<(usize, usize), Vec> = HashMap::new(); for h in holders.iter() { by_span - .entry((h.start_line, h.end_line)) + .entry((h.start_line.get(), h.end_line.get())) .or_default() .push(h.holder.clone()); } holders.retain(|h| { - let Some(group) = by_span.get(&(h.start_line, h.end_line)) else { + let Some(group) = by_span.get(&(h.start_line.get(), h.end_line.get())) else { return true; }; @@ -10101,13 +10141,13 @@ fn drop_shadowed_prefix_holders(holders: &mut Vec) { let mut by_span: HashMap<(usize, usize), Vec> = HashMap::new(); for h in holders.iter() { by_span - .entry((h.start_line, h.end_line)) + .entry((h.start_line.get(), h.end_line.get())) .or_default() .push(h.holder.clone()); } holders.retain(|h| { - let Some(group) = by_span.get(&(h.start_line, h.end_line)) else { + let Some(group) = by_span.get(&(h.start_line.get(), h.end_line.get())) else { return true; }; @@ -10175,13 +10215,13 @@ fn drop_shadowed_prefix_copyrights(copyrights: &mut Vec) { let mut by_span: HashMap<(usize, usize), Vec> = HashMap::new(); for c in copyrights.iter() { by_span - .entry((c.start_line, c.end_line)) + .entry((c.start_line.get(), c.end_line.get())) .or_default() .push(c.copyright.clone()); } copyrights.retain(|c| { - let Some(group) = by_span.get(&(c.start_line, c.end_line)) else { + let Some(group) = by_span.get(&(c.start_line.get(), c.end_line.get())) else { return true; }; let short = c.copyright.as_str(); @@ -10287,7 +10327,7 @@ fn drop_shadowed_bare_c_copyrights_same_span(copyrights: &mut Vec= copy_line || keep_prefix_lines.contains(&t.start_line) + t.start_line >= copy_line || keep_prefix_lines.contains(&t.start_line.get()) }); } strip_trailing_commas(&mut node_holder_leaves); @@ -11173,7 +11217,8 @@ fn extract_from_tree_nodes( let mut node_holder_mini = strip_all_rights_reserved(node_holder_mini); if let Some(copy_line) = copy_line { node_holder_mini.retain(|t| { - t.start_line >= copy_line || keep_prefix_lines.contains(&t.start_line) + t.start_line >= copy_line + || keep_prefix_lines.contains(&t.start_line.get()) }); } strip_trailing_commas(&mut node_holder_mini); @@ -11326,7 +11371,10 @@ fn merge_copyright_with_following_author<'a>( } let cr_leaves_all = collect_all_leaves(copyright_node); - let cr_last_line = cr_leaves_all.last().map(|t| t.start_line).unwrap_or(0); + let cr_last_line = cr_leaves_all + .last() + .map(|t| t.start_line) + .unwrap_or(LineNumber::ONE); let author_first_line = auth_token.start_line; if author_first_line != cr_last_line + 1 { return None; @@ -11391,7 +11439,7 @@ fn merge_copyright_with_following_author<'a>( fn extract_sectioned_authors_from_author_node(node: &ParseNode) -> Option> { let all_leaves = collect_all_leaves(node); - let mut header_lines: Vec = Vec::new(); + let mut header_lines: Vec = Vec::new(); for t in &all_leaves { let v = t .value @@ -11660,7 +11708,7 @@ fn is_name_continuation(node: &ParseNode) -> bool { } } -fn is_same_line_holder_suffix_prefix(tree: &[ParseNode], idx: usize, line: usize) -> bool { +fn is_same_line_holder_suffix_prefix(tree: &[ParseNode], idx: usize, line: LineNumber) -> bool { let Some(node) = tree.get(idx) else { return false; }; @@ -12256,7 +12304,7 @@ fn collect_trailing_orphan_tokens<'a>( fn collect_following_copyright_clause_tokens( tree: &[ParseNode], start: usize, - line: usize, + line: LineNumber, ) -> (Vec<&Token>, usize) { if start >= tree.len() { return (Vec::new(), 0); @@ -12447,7 +12495,7 @@ const AUTHOR_BY_KEYWORDS: &[&str] = &[ "patches", ]; -fn is_line_initial_keyword(tree: &[ParseNode], idx: usize, keyword_line: usize) -> bool { +fn is_line_initial_keyword(tree: &[ParseNode], idx: usize, keyword_line: LineNumber) -> bool { if idx == 0 { return true; } @@ -12930,7 +12978,7 @@ fn extract_bare_copyrights( copyrights: &mut Vec, holders: &mut Vec, ) { - fn has_line_start_copyright_prefix(tree: &[ParseNode], idx: usize, line: usize) -> bool { + fn has_line_start_copyright_prefix(tree: &[ParseNode], idx: usize, line: LineNumber) -> bool { let mut found_copyright = false; for j in (0..idx).rev() { for t in collect_all_leaves(&tree[j]).iter().rev() { @@ -13667,9 +13715,9 @@ fn apply_written_by_for_markers( for cr in copyrights.iter_mut() { let next_line = cr.end_line.saturating_add(1); - let next_text = group - .iter() - .find_map(|(ln, text)| (*ln == next_line).then_some(text.as_str())); + let next_text = group.iter().find_map(|(ln, text)| { + (LineNumber::new(*ln) == Some(next_line)).then_some(text.as_str()) + }); let Some(next_text) = next_text else { continue; @@ -13682,7 +13730,10 @@ fn apply_written_by_for_markers( cr.copyright = format!("{} Written", cr.copyright.trim_end()); } - for h in holders.iter_mut().filter(|h| h.end_line == cr.end_line) { + for h in holders + .iter_mut() + .filter(|h| h.end_line.get() == cr.end_line.get()) + { if !h.holder.ends_with("Written") { h.holder = format!("{} Written", h.holder.trim_end()); } @@ -13711,8 +13762,8 @@ fn restore_bare_holder_angle_emails( } for cr in copyrights.iter().filter(|c| { - h.start_line >= c.start_line - && h.end_line <= c.end_line + h.start_line.get() >= c.start_line.get() + && h.end_line.get() <= c.end_line.get() && !c.copyright.to_ascii_lowercase().contains("copyright") }) { let Some(cap) = LEADING_NAME_EMAIL_RE.captures(cr.copyright.as_str()) else { @@ -13765,7 +13816,9 @@ fn build_holder_from_copyright_node( let leaves = collect_holder_filtered_leaves(node, ignored_labels, ignored_pos_tags); let mut filtered = strip_all_rights_reserved(leaves); if let Some(copy_line) = copy_line { - filtered.retain(|t| t.start_line >= copy_line || keep_prefix_lines.contains(&t.start_line)); + filtered.retain(|t| { + t.start_line >= copy_line || keep_prefix_lines.contains(&t.start_line.get()) + }); } let allow_single_word_contributors = collect_all_leaves(node) @@ -13775,13 +13828,13 @@ fn build_holder_from_copyright_node( build_holder_from_tokens(&filtered, allow_single_word_contributors) } -fn signal_lines_before_copy_line(node: &ParseNode, copy_line: usize) -> HashSet { +fn signal_lines_before_copy_line(node: &ParseNode, copy_line: LineNumber) -> HashSet { use std::collections::HashMap; let mut by_line: HashMap> = HashMap::new(); for t in collect_all_leaves(node) { if t.start_line < copy_line { - by_line.entry(t.start_line).or_default().push(t); + by_line.entry(t.start_line.get()).or_default().push(t); } } @@ -13867,8 +13920,14 @@ fn build_copyright_from_tokens(tokens: &[&Token]) -> Option } Some(CopyrightDetection { copyright: refined, - start_line: tokens.first().map(|t| t.start_line).unwrap_or(0), - end_line: tokens.last().map(|t| t.start_line).unwrap_or(0), + start_line: tokens + .first() + .map(|t| t.start_line) + .unwrap_or(LineNumber::ONE), + end_line: tokens + .last() + .map(|t| t.start_line) + .unwrap_or(LineNumber::ONE), }) } @@ -13890,8 +13949,14 @@ fn build_holder_from_tokens( } Some(HolderDetection { holder: refined, - start_line: tokens.first().map(|t| t.start_line).unwrap_or(0), - end_line: tokens.last().map(|t| t.start_line).unwrap_or(0), + start_line: tokens + .first() + .map(|t| t.start_line) + .unwrap_or(LineNumber::ONE), + end_line: tokens + .last() + .map(|t| t.start_line) + .unwrap_or(LineNumber::ONE), }) } @@ -13906,8 +13971,14 @@ fn build_author_from_tokens(tokens: &[&Token]) -> Option { } Some(AuthorDetection { author: refined, - start_line: tokens.first().map(|t| t.start_line).unwrap_or(0), - end_line: tokens.last().map(|t| t.start_line).unwrap_or(0), + start_line: tokens + .first() + .map(|t| t.start_line) + .unwrap_or(LineNumber::ONE), + end_line: tokens + .last() + .map(|t| t.start_line) + .unwrap_or(LineNumber::ONE), }) } @@ -14025,23 +14096,23 @@ fn drop_scan_only_holders_from_copyright_scan_lines( let copyright_spans: HashSet<(usize, usize)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line)) + .map(|c| (c.start_line.get(), c.end_line.get())) .collect(); holders.retain(|holder| { - let span = (holder.start_line, holder.end_line); + let span = (holder.start_line.get(), holder.end_line.get()); if copyright_spans.contains(&span) { return true; } if !holder.holder.eq_ignore_ascii_case("scan") { return true; } - if holder.start_line == 0 || holder.start_line != holder.end_line { + if holder.start_line.get() != holder.end_line.get() { return true; } raw_lines - .get(holder.start_line - 1) + .get(holder.start_line.get() - 1) .is_none_or(|line| !COPYRIGHT_SCAN_RE.is_match(line)) }); } @@ -14073,15 +14144,15 @@ fn drop_path_fragment_holders_from_bare_c_code_lines( let copyright_spans: HashSet<(usize, usize)> = copyrights .iter() - .map(|c| (c.start_line, c.end_line)) + .map(|c| (c.start_line.get(), c.end_line.get())) .collect(); holders.retain(|holder| { - let span = (holder.start_line, holder.end_line); + let span = (holder.start_line.get(), holder.end_line.get()); if copyright_spans.contains(&span) { return true; } - if holder.start_line == 0 || holder.start_line != holder.end_line { + if holder.start_line.get() != holder.end_line.get() { return true; } if !is_path_like_code_fragment(&holder.holder) { @@ -14089,7 +14160,7 @@ fn drop_path_fragment_holders_from_bare_c_code_lines( } raw_lines - .get(holder.start_line - 1) + .get(holder.start_line.get() - 1) .is_none_or(|line| !BARE_C_PATH_FRAGMENT_RE.is_match(line)) }); } @@ -14107,7 +14178,7 @@ const YEAR_LIKE_LABELS: &[TreeLabel] = &[TreeLabel::YrRange, TreeLabel::YrAnd]; struct HolderLeafFilterState<'a> { result: Vec<&'a Token>, last_was_year_filtered: bool, - last_filtered_email_or_url_line: Option, + last_filtered_email_or_url_line: Option, last_filtered_email_was_angle_bracket: bool, pending_comma_after_filtered_email_or_url: Option<&'a Token>, last_filtered_was_paren_url: bool, @@ -14247,7 +14318,7 @@ fn filter_holder_tokens_with_state<'a>( ) -> Vec<&'a Token> { let mut result = Vec::new(); let mut last_was_year_filtered = predecessor_was_year_filtered; - let mut last_filtered_email_or_url_line: Option = None; + let mut last_filtered_email_or_url_line: Option = None; let mut last_filtered_email_was_angle_bracket = false; // Track when the last filtered token was a parenthesized URL (Markdown-style // [Name](URL),) so we can drop the immediately following comma unconditionally. diff --git a/src/copyright/detector_author_heuristics.rs b/src/copyright/detector_author_heuristics.rs index 8e73f563d..6edb329fd 100644 --- a/src/copyright/detector_author_heuristics.rs +++ b/src/copyright/detector_author_heuristics.rs @@ -8,9 +8,10 @@ use crate::copyright::line_tracking::PreparedLineCache; use crate::copyright::prepare::prepare_text_line; use crate::copyright::refiner::refine_author; use crate::copyright::types::{AuthorDetection, CopyrightDetection, HolderDetection}; +use crate::models::LineNumber; -fn line_number_for_offset(content: &str, offset: usize) -> usize { - content[..offset].bytes().filter(|b| *b == b'\n').count() + 1 +fn line_number_for_offset(content: &str, offset: usize) -> LineNumber { + LineNumber::from_0_indexed(content[..offset].bytes().filter(|b| *b == b'\n').count()) } fn decode_markup_entities(value: &str) -> String { @@ -214,8 +215,8 @@ pub(super) fn extract_multiline_written_by_author_blocks( { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -314,11 +315,11 @@ pub(super) fn extract_multiline_written_by_author_blocks( if let Some(combined) = refine_author(combined_candidate) && seen.insert(combined.clone()) { - authors.retain(|a| a.start_line < start_line || a.end_line > end_line); + authors.retain(|a| a.start_line.get() < start_line || a.end_line.get() > end_line); authors.push(AuthorDetection { author: combined, - start_line, - end_line, + start_line: LineNumber::new(start_line).expect("valid"), + end_line: LineNumber::new(end_line).expect("valid"), }); i = j; continue; @@ -341,8 +342,8 @@ pub(super) fn extract_multiline_written_by_author_blocks( { authors.push(AuthorDetection { author, - start_line, - end_line, + start_line: LineNumber::new(start_line).expect("valid"), + end_line: LineNumber::new(end_line).expect("valid"), }); } extracted_any = true; @@ -368,11 +369,11 @@ pub(super) fn extract_multiline_written_by_author_blocks( if let Some(combined) = refine_author(combined_candidate) && seen.insert(combined.clone()) { - authors.retain(|a| a.start_line < start_line || a.end_line > end_line); + authors.retain(|a| a.start_line.get() < start_line || a.end_line.get() > end_line); authors.push(AuthorDetection { author: combined, - start_line, - end_line, + start_line: LineNumber::new(start_line).expect("valid"), + end_line: LineNumber::new(end_line).expect("valid"), }); } } @@ -413,8 +414,8 @@ pub(super) fn extract_json_excerpt_developed_by_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }); } } @@ -504,8 +505,8 @@ pub(super) fn extract_module_author_macros( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -604,8 +605,8 @@ pub(super) fn extract_was_developed_by_author_blocks( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: end_ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(end_ln).expect("invalid line number"), }); } @@ -781,11 +782,11 @@ pub(super) fn extract_author_colon_blocks( }; if seen.insert(combined.clone()) { - authors.retain(|a| a.start_line < start_line || a.end_line > end_line); + authors.retain(|a| a.start_line.get() < start_line || a.end_line.get() > end_line); authors.push(AuthorDetection { author: combined, - start_line, - end_line, + start_line: LineNumber::new(start_line).expect("valid"), + end_line: LineNumber::new(end_line).expect("valid"), }); } @@ -944,8 +945,8 @@ pub(super) fn extract_code_written_by_author_blocks( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: j, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(j).expect("invalid line number"), }); } @@ -1024,8 +1025,8 @@ pub(super) fn extract_developed_and_created_by_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author: author.clone(), - start_line: start_idx + 1, - end_line: end_idx + 1, + start_line: LineNumber::from_0_indexed(start_idx), + end_line: LineNumber::from_0_indexed(end_idx), }); } @@ -1064,8 +1065,8 @@ pub(super) fn extract_with_additional_hacking_by_authors( { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1139,17 +1140,20 @@ pub(super) fn merge_metadata_author_and_email_lines( if seen.insert(combined.clone()) { authors.push(AuthorDetection { author: combined, - start_line: author_ln, - end_line: email_ln, + start_line: LineNumber::new(author_ln).expect("invalid line number"), + end_line: LineNumber::new(email_ln).expect("invalid line number"), }); } authors.retain(|a| { - if a.start_line == author_ln && a.end_line == author_ln && a.author == name { + if a.start_line.get() == author_ln + && a.end_line.get() == author_ln + && a.author == name + { return false; } - if a.start_line == email_ln - && a.end_line == email_ln + if a.start_line.get() == email_ln + && a.end_line.get() == email_ln && a.author.to_ascii_lowercase() == format!("author-email {email}") { return false; @@ -1217,8 +1221,8 @@ pub(super) fn extract_debian_maintainer_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1247,8 +1251,8 @@ pub(super) fn extract_created_by_project_author( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } break; @@ -1300,8 +1304,8 @@ pub(super) fn extract_created_by_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author: author.clone(), - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } @@ -1344,11 +1348,11 @@ pub(super) fn extract_written_by_comma_and_copyright_authors( continue; }; if seen.insert(author.clone()) { - authors.retain(|a| !(a.start_line == ln && a.end_line == ln)); + authors.retain(|a| !(a.start_line.get() == ln && a.end_line.get() == ln)); authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1403,8 +1407,8 @@ pub(super) fn extract_package_comment_named_authors( { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1465,8 +1469,8 @@ pub(super) fn extract_developed_by_sentence_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1514,8 +1518,8 @@ pub(super) fn extract_developed_by_phrase_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1559,8 +1563,8 @@ pub(super) fn extract_maintained_by_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1623,8 +1627,8 @@ pub(super) fn extract_converted_to_by_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author: author.clone(), - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } if add_converted_variant { @@ -1632,8 +1636,8 @@ pub(super) fn extract_converted_to_by_authors( if seen.insert(converted.clone()) { authors.push(AuthorDetection { author: converted, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1679,8 +1683,8 @@ pub(super) fn extract_various_bugfixes_and_enhancements_by_authors( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } @@ -1791,10 +1795,12 @@ pub(super) fn drop_ref_markup_authors(authors: &mut Vec) { pub(super) fn normalize_json_blob_authors(raw_lines: &[&str], authors: &mut Vec) { let mut normalized: Vec = Vec::with_capacity(authors.len()); - let mut seen: HashSet<(usize, usize, String)> = HashSet::new(); + let mut seen: HashSet<(LineNumber, LineNumber, String)> = HashSet::new(); for author in authors.iter() { - let Some(window) = json_author_window(raw_lines, author.start_line, author.end_line) else { + let Some(window) = + json_author_window(raw_lines, author.start_line.get(), author.end_line.get()) + else { let key = (author.start_line, author.end_line, author.author.clone()); if seen.insert(key) { normalized.push(author.clone()); @@ -2023,8 +2029,8 @@ pub(super) fn extract_dense_name_email_author_lists( if seen.insert(author.clone()) { authors.push(AuthorDetection { author, - start_line: ln, - end_line: ln, + start_line: LineNumber::new(ln).expect("invalid line number"), + end_line: LineNumber::new(ln).expect("invalid line number"), }); } } diff --git a/src/copyright/detector_test.rs b/src/copyright/detector_test.rs index 44a414623..8a7c38f2a 100644 --- a/src/copyright/detector_test.rs +++ b/src/copyright/detector_test.rs @@ -1,4 +1,5 @@ use super::*; +use crate::models::LineNumber; use std::fs; use std::path::PathBuf; @@ -20,13 +21,13 @@ fn test_drop_shadowed_year_only_prefix_same_start_line() { let mut copyrights = vec![ CopyrightDetection { copyright: "(c) 2001".to_string(), - start_line: 5, - end_line: 5, + start_line: LineNumber::new(5).unwrap(), + end_line: LineNumber::new(5).unwrap(), }, CopyrightDetection { copyright: "(c) 2001 Foo Bar".to_string(), - start_line: 5, - end_line: 5, + start_line: LineNumber::new(5).unwrap(), + end_line: LineNumber::new(5).unwrap(), }, ]; drop_shadowed_year_only_copyright_prefixes_same_start_line(&mut copyrights); @@ -735,7 +736,7 @@ fn test_extract_from_tree_nodes_builds_hall_holder_tokens() { let mut debug_lines: Vec = Vec::new(); for (i, node) in tree.iter().enumerate() { let leaves = collect_all_leaves(node); - let line = leaves.first().map(|t| t.start_line).unwrap_or(0); + let line = leaves.first().map(|t| t.start_line.get()).unwrap_or(0); let has_2004 = leaves .iter() .any(|t| t.tag == PosTag::Yr && t.value.starts_with("2004")); @@ -777,10 +778,10 @@ fn test_extract_from_tree_nodes_builds_hall_holder_tokens() { let copy_line = collect_all_leaves(hall_node) .iter() .filter(|t| t.tag == PosTag::Copy && t.value.eq_ignore_ascii_case("copyright")) - .map(|t| t.start_line) + .map(|t| t.start_line.get()) .min(); let keep_prefix_lines = copy_line - .map(|cl| signal_lines_before_copy_line(hall_node, cl)) + .map(|cl| signal_lines_before_copy_line(hall_node, LineNumber::new(cl).unwrap())) .unwrap_or_default(); let node_holder_leaves = @@ -788,8 +789,9 @@ fn test_extract_from_tree_nodes_builds_hall_holder_tokens() { let mut holder_tokens: Vec<&Token> = Vec::new(); let mut node_holder_leaves = strip_all_rights_reserved(node_holder_leaves); if let Some(copy_line) = copy_line { - node_holder_leaves - .retain(|t| t.start_line >= copy_line || keep_prefix_lines.contains(&t.start_line)); + node_holder_leaves.retain(|t| { + t.start_line.get() >= copy_line || keep_prefix_lines.contains(&t.start_line.get()) + }); } holder_tokens.extend(node_holder_leaves); holder_tokens.extend(&trailing_tokens); @@ -2034,7 +2036,7 @@ fn test_detect_simple_copyright() { "Should contain year: {}", c[0].copyright ); - assert_eq!(c[0].start_line, 1); + assert_eq!(c[0].start_line, LineNumber::ONE); assert!(!h.is_empty(), "Should detect holder"); } @@ -2231,8 +2233,8 @@ fn test_detect_author() { assert!(h.is_empty(), "Should not detect holder"); assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a); assert_eq!(a[0].author, "John Doe"); - assert_eq!(a[0].start_line, 1); - assert_eq!(a[0].end_line, 1); + assert_eq!(a[0].start_line, LineNumber::ONE); + assert_eq!(a[0].end_line, LineNumber::ONE); } #[test] @@ -2244,8 +2246,8 @@ fn test_detect_author_from_xml_author_attribute() { assert!(h.is_empty(), "Should not detect holder"); assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a); assert_eq!(a[0].author, "Vinnie Falco"); - assert_eq!(a[0].start_line, 1); - assert_eq!(a[0].end_line, 1); + assert_eq!(a[0].start_line, LineNumber::ONE); + assert_eq!(a[0].end_line, LineNumber::ONE); } #[test] @@ -2337,7 +2339,11 @@ fn test_detect_line_numbers() { let text = "Some header\nCopyright 2024 Acme Inc.\nSome footer"; let (c, _h, _a) = detect_copyrights_from_text(text); assert!(!c.is_empty(), "Should detect copyright"); - assert_eq!(c[0].start_line, 2, "Copyright should be on line 2"); + assert_eq!( + c[0].start_line, + LineNumber::new(2).unwrap(), + "Copyright should be on line 2" + ); } #[test] @@ -2345,11 +2351,11 @@ fn test_detect_copyright_year_range() { let (c, h, _a) = detect_copyrights_from_text("Copyright 2020-2024 Foo Corp."); assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c); assert_eq!(c[0].copyright, "Copyright 2020-2024 Foo Corp."); - assert_eq!(c[0].start_line, 1); - assert_eq!(c[0].end_line, 1); + assert_eq!(c[0].start_line, LineNumber::ONE); + assert_eq!(c[0].end_line, LineNumber::ONE); assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h); assert_eq!(h[0].holder, "Foo Corp."); - assert_eq!(h[0].start_line, 1); + assert_eq!(h[0].start_line, LineNumber::ONE); } #[test] @@ -2710,12 +2716,12 @@ fn test_detect_copyright_and_author_same_text() { let (c, h, a) = detect_copyrights_from_text(text); assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c); assert_eq!(c[0].copyright, "Copyright 2024 Acme Inc."); - assert_eq!(c[0].start_line, 1); + assert_eq!(c[0].start_line, LineNumber::ONE); assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h); assert_eq!(h[0].holder, "Acme Inc."); assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a); assert_eq!(a[0].author, "Jane Smith"); - assert_eq!(a[0].start_line, 5); + assert_eq!(a[0].start_line, LineNumber::new(5).unwrap()); } #[test] @@ -2723,8 +2729,8 @@ fn test_detect_author_written_by() { let (_c, _h, a) = detect_copyrights_from_text("Written by Jane Smith"); assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a); assert_eq!(a[0].author, "Jane Smith"); - assert_eq!(a[0].start_line, 1); - assert_eq!(a[0].end_line, 1); + assert_eq!(a[0].start_line, LineNumber::ONE); + assert_eq!(a[0].end_line, LineNumber::ONE); } #[test] @@ -2732,8 +2738,8 @@ fn test_detect_author_maintained_by() { let (_c, _h, a) = detect_copyrights_from_text("Maintained by Bob Jones"); assert_eq!(a.len(), 1, "Should detect one author, got: {:?}", a); assert_eq!(a[0].author, "Bob Jones"); - assert_eq!(a[0].start_line, 1); - assert_eq!(a[0].end_line, 1); + assert_eq!(a[0].start_line, LineNumber::ONE); + assert_eq!(a[0].end_line, LineNumber::ONE); } #[test] @@ -2785,10 +2791,10 @@ fn test_detect_copyright_with_company() { let (c, h, _a) = detect_copyrights_from_text("Copyright (c) 2024 Google LLC"); assert_eq!(c.len(), 1, "Should detect one copyright, got: {:?}", c); assert_eq!(c[0].copyright, "Copyright (c) 2024 Google LLC"); - assert_eq!(c[0].start_line, 1); + assert_eq!(c[0].start_line, LineNumber::ONE); assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h); assert_eq!(h[0].holder, "Google LLC"); - assert_eq!(h[0].start_line, 1); + assert_eq!(h[0].start_line, LineNumber::ONE); } #[test] @@ -2799,10 +2805,10 @@ fn test_detect_copyright_all_rights_reserved() { c[0].copyright, "Copyright 2024 Apple Inc.", "All rights reserved should be stripped from copyright text" ); - assert_eq!(c[0].start_line, 1); + assert_eq!(c[0].start_line, LineNumber::ONE); assert_eq!(h.len(), 1, "Should detect one holder, got: {:?}", h); assert_eq!(h[0].holder, "Apple Inc."); - assert_eq!(h[0].start_line, 1); + assert_eq!(h[0].start_line, LineNumber::ONE); } // ── strip_all_rights_reserved ──────────────────────────────────── @@ -2813,32 +2819,32 @@ fn test_strip_all_rights_reserved_basic() { Token { value: "Copyright".to_string(), tag: PosTag::Copy, - start_line: 1, + start_line: LineNumber::ONE, }, Token { value: "2024".to_string(), tag: PosTag::Yr, - start_line: 1, + start_line: LineNumber::ONE, }, Token { value: "Acme".to_string(), tag: PosTag::Nnp, - start_line: 1, + start_line: LineNumber::ONE, }, Token { value: "All".to_string(), tag: PosTag::Nn, - start_line: 1, + start_line: LineNumber::ONE, }, Token { value: "Rights".to_string(), tag: PosTag::Right, - start_line: 1, + start_line: LineNumber::ONE, }, Token { value: "Reserved".to_string(), tag: PosTag::Reserved, - start_line: 1, + start_line: LineNumber::ONE, }, ]; let refs: Vec<&Token> = tokens.iter().collect(); @@ -2859,17 +2865,17 @@ fn test_collect_filtered_leaves_filters_pos_tags() { ParseNode::Leaf(Token { value: "Copyright".to_string(), tag: PosTag::Copy, - start_line: 1, + start_line: LineNumber::ONE, }), ParseNode::Leaf(Token { value: "2024".to_string(), tag: PosTag::Yr, - start_line: 1, + start_line: LineNumber::ONE, }), ParseNode::Leaf(Token { value: "Acme".to_string(), tag: PosTag::Nnp, - start_line: 1, + start_line: LineNumber::ONE, }), ], }; @@ -2887,20 +2893,20 @@ fn test_collect_filtered_leaves_filters_tree_labels() { ParseNode::Leaf(Token { value: "Copyright".to_string(), tag: PosTag::Copy, - start_line: 1, + start_line: LineNumber::ONE, }), ParseNode::Tree { label: TreeLabel::YrRange, children: vec![ParseNode::Leaf(Token { value: "2024".to_string(), tag: PosTag::Yr, - start_line: 1, + start_line: LineNumber::ONE, })], }, ParseNode::Leaf(Token { value: "Acme".to_string(), tag: PosTag::Nnp, - start_line: 1, + start_line: LineNumber::ONE, }), ], }; @@ -3067,23 +3073,23 @@ fn test_drop_shadowed_c_sign_variants_unit() { let mut c = vec![ CopyrightDetection { copyright: "Copyright 2007, 2010 Linux Foundation".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }, CopyrightDetection { copyright: "Copyright (c) 2007, 2010 Linux Foundation".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }, CopyrightDetection { copyright: "Copyright 1995-2010 Jean-loup Gailly and Mark Adler".to_string(), - start_line: 10, - end_line: 10, + start_line: LineNumber::new(10).unwrap(), + end_line: LineNumber::new(10).unwrap(), }, CopyrightDetection { copyright: "Copyright (c) 1995-2010 Jean-loup Gailly and Mark Adler".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }, ]; drop_shadowed_c_sign_variants(&mut c); @@ -3405,13 +3411,21 @@ fn test_html_anchor_copyright_url_multiline_span_preserved() { .iter() .find(|cr| cr.copyright == "copyright https://example.com/path") .unwrap(); - assert_eq!((cd.start_line, cd.end_line), (1, 3), "copyrights: {c:?}"); + assert_eq!( + (cd.start_line, cd.end_line), + (LineNumber::new(1).unwrap(), LineNumber::new(3).unwrap()), + "copyrights: {c:?}" + ); let hd = h .iter() .find(|hr| hr.holder == "https://example.com/path") .unwrap(); - assert_eq!((hd.start_line, hd.end_line), (1, 3), "holders: {h:?}"); + assert_eq!( + (hd.start_line, hd.end_line), + (LineNumber::new(1).unwrap(), LineNumber::new(3).unwrap()), + "holders: {h:?}" + ); } #[test] @@ -4135,8 +4149,8 @@ fn test_boost_style_multiline_holder_continuation_after_year_first_line() { assert!( copyrights.iter().any(|c| { - c.start_line == 1 - && c.end_line == 2 + c.start_line == LineNumber::ONE + && c.end_line == LineNumber::new(2).unwrap() && c.copyright.contains("Peter Dimov") && c.copyright.contains("Vinnie Falco") }), @@ -4145,8 +4159,8 @@ fn test_boost_style_multiline_holder_continuation_after_year_first_line() { assert!( holders.iter().any(|h| { - h.start_line == 1 - && h.end_line == 2 + h.start_line == LineNumber::ONE + && h.end_line == LineNumber::new(2).unwrap() && h.holder.contains("Peter Dimov") && h.holder.contains("Vinnie Falco") }), @@ -4164,8 +4178,8 @@ Jean-Luc.Richier@imag.fr, IMAG-LSR.\n"; assert!( copyrights.iter().any(|c| { - c.start_line == 1 - && c.end_line == 1 + c.start_line == LineNumber::ONE + && c.end_line == LineNumber::ONE && c.copyright == "Copyright (c) 1995, 1996, 1997 Francis.Dupont@inria.fr, INRIA" }), "copyrights: {copyrights:?}" @@ -4193,7 +4207,7 @@ fn test_extend_copyright_with_following_all_rights_reserved_line() { assert!( copyrights .iter() - .any(|c| c.start_line == 1 && c.end_line == 2), + .any(|c| c.start_line == LineNumber::ONE && c.end_line == LineNumber::new(2).unwrap()), "copyrights: {copyrights:?}" ); assert!( diff --git a/src/copyright/lexer.rs b/src/copyright/lexer.rs index 6cbf0143a..e6d1d2b9c 100644 --- a/src/copyright/lexer.rs +++ b/src/copyright/lexer.rs @@ -12,6 +12,7 @@ use regex::Regex; use super::patterns::COMPILED_PATTERNS; use super::types::{PosTag, Token}; +use crate::models::LineNumber; /// Splitter regex: splits on tabs, spaces, equals signs, and semicolons. /// Matches Python's `re.compile(r'[\t =;]+').split`. @@ -47,7 +48,7 @@ pub fn get_tokens(numbered_lines: &[(usize, String)]) -> Vec { tokens.push(Token { value: "\n".to_string(), tag: PosTag::EmptyLine, - start_line: *start_line, + start_line: LineNumber::new(*start_line).expect("invalid line number"), }); last_line.clear(); continue; @@ -78,12 +79,12 @@ pub fn get_tokens(numbered_lines: &[(usize, String)]) -> Vec { tokens.push(Token { value: base.to_string(), tag, - start_line: *start_line, + start_line: LineNumber::new(*start_line).expect("invalid line number"), }); tokens.push(Token { value: ",".to_string(), tag: PosTag::Cc, - start_line: *start_line, + start_line: LineNumber::new(*start_line).expect("invalid line number"), }); continue; } @@ -94,7 +95,7 @@ pub fn get_tokens(numbered_lines: &[(usize, String)]) -> Vec { tokens.push(Token { value: tok, tag, - start_line: *start_line, + start_line: LineNumber::new(*start_line).expect("invalid line number"), }); } } diff --git a/src/copyright/lexer_test.rs b/src/copyright/lexer_test.rs index 4c61e258f..e9ea62f8f 100644 --- a/src/copyright/lexer_test.rs +++ b/src/copyright/lexer_test.rs @@ -1,4 +1,5 @@ use super::*; +use crate::models::LineNumber; #[test] fn test_simple_copyright_line() { @@ -8,7 +9,7 @@ fn test_simple_copyright_line() { assert_eq!(tokens[0].value, "Copyright"); assert_eq!(tokens[0].tag, PosTag::Copy); - assert_eq!(tokens[0].start_line, 1); + assert_eq!(tokens[0].start_line, LineNumber::ONE); assert_eq!(tokens[1].value, "2024"); assert_eq!(tokens[1].tag, PosTag::Yr); @@ -109,10 +110,10 @@ fn test_line_numbers_preserved() { (11, "Acme Inc.".to_string()), ]; let tokens = get_tokens(&lines); - assert_eq!(tokens[0].start_line, 10); + assert_eq!(tokens[0].start_line, LineNumber::new(10).unwrap()); // "Acme" should be on line 11 let acme = tokens.iter().find(|t| t.value == "Acme").unwrap(); - assert_eq!(acme.start_line, 11); + assert_eq!(acme.start_line, LineNumber::new(11).unwrap()); } #[test] diff --git a/src/copyright/line_tracking.rs b/src/copyright/line_tracking.rs index 34567f5d2..80fbe3e39 100644 --- a/src/copyright/line_tracking.rs +++ b/src/copyright/line_tracking.rs @@ -1,4 +1,5 @@ use super::prepare::prepare_text_line; +use crate::models::LineNumber; pub(super) struct PreparedLineCache<'a> { raw_lines: &'a [&'a str], @@ -78,10 +79,11 @@ impl LineNumberIndex { } } - pub(super) fn line_number_at_offset(&self, byte_offset: usize) -> usize { + pub(super) fn line_number_at_offset(&self, byte_offset: usize) -> LineNumber { let offset = byte_offset.min(self.content_len); - 1 + self - .newline_offsets - .partition_point(|&line_break| line_break < offset) + LineNumber::from_0_indexed( + self.newline_offsets + .partition_point(|&line_break| line_break < offset), + ) } } diff --git a/src/copyright/parser.rs b/src/copyright/parser.rs index 7b722ca67..036f221f3 100644 --- a/src/copyright/parser.rs +++ b/src/copyright/parser.rs @@ -8,15 +8,16 @@ use std::time::Instant; use super::grammar::{GRAMMAR_RULES, GrammarRule, TagMatcher}; use super::types::{ParseNode, Token}; +use crate::models::LineNumber; -fn first_line(node: &ParseNode) -> Option { +fn first_line(node: &ParseNode) -> Option { match node { ParseNode::Leaf(t) => Some(t.start_line), ParseNode::Tree { children, .. } => children.iter().filter_map(first_line).min(), } } -fn last_line(node: &ParseNode) -> Option { +fn last_line(node: &ParseNode) -> Option { match node { ParseNode::Leaf(t) => Some(t.start_line), ParseNode::Tree { children, .. } => children.iter().filter_map(last_line).max(), diff --git a/src/copyright/parser_test.rs b/src/copyright/parser_test.rs index 394ea74b5..a679b96b8 100644 --- a/src/copyright/parser_test.rs +++ b/src/copyright/parser_test.rs @@ -1,11 +1,12 @@ use super::*; use crate::copyright::types::{PosTag, TreeLabel}; +use crate::models::LineNumber; fn make_token(value: &str, tag: PosTag, line: usize) -> Token { Token { value: value.to_string(), tag, - start_line: line, + start_line: LineNumber::new(line).unwrap(), } } diff --git a/src/copyright/types.rs b/src/copyright/types.rs index 06db17cc2..8ab7ffa35 100644 --- a/src/copyright/types.rs +++ b/src/copyright/types.rs @@ -8,37 +8,27 @@ use serde::Serialize; -/// A detected copyright statement with source location. +use crate::models::LineNumber; + #[derive(Debug, Clone, PartialEq, Serialize)] pub struct CopyrightDetection { - /// The full copyright text (e.g., "Copyright 2024 Acme Inc."). pub copyright: String, - /// 1-based line number where this detection starts. - pub start_line: usize, - /// 1-based line number where this detection ends. - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } -/// A detected copyright holder name with source location. #[derive(Debug, Clone, PartialEq, Serialize)] pub struct HolderDetection { - /// The holder name (e.g., "Acme Inc."). pub holder: String, - /// 1-based line number where this detection starts. - pub start_line: usize, - /// 1-based line number where this detection ends. - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } -/// A detected author name with source location. #[derive(Debug, Clone, PartialEq, Serialize)] pub struct AuthorDetection { - /// The author name (e.g., "John Doe"). pub author: String, - /// 1-based line number where this detection starts. - pub start_line: usize, - /// 1-based line number where this detection ends. - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } /// Part-of-Speech tag for a token (type-safe, not stringly-typed) @@ -117,15 +107,11 @@ pub enum PosTag { Day, // Day of week } -/// A token with its POS tag and source location. #[derive(Debug, Clone)] pub struct Token { - /// The token text (e.g., "Copyright", "2024", "Acme"). pub value: String, - /// The assigned POS tag. pub tag: PosTag, - /// 1-based source line number. - pub start_line: usize, + pub start_line: LineNumber, } /// A node in the parse tree @@ -185,8 +171,8 @@ mod tests { fn test_copyright_detection_creation() { let d = CopyrightDetection { copyright: "Copyright 2024 Acme Inc.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }; assert_eq!(d.copyright, "Copyright 2024 Acme Inc."); } @@ -196,7 +182,7 @@ mod tests { let t = Token { value: "Copyright".to_string(), tag: PosTag::Copy, - start_line: 1, + start_line: LineNumber::ONE, }; assert_eq!(t.tag, PosTag::Copy); } @@ -206,7 +192,7 @@ mod tests { let node = ParseNode::Leaf(Token { value: "2024".to_string(), tag: PosTag::Yr, - start_line: 5, + start_line: LineNumber::new(5).unwrap(), }); assert_eq!(node.tag(), Some(PosTag::Yr)); assert_eq!(node.label(), None); @@ -217,7 +203,7 @@ mod tests { let child = ParseNode::Leaf(Token { value: "2024".to_string(), tag: PosTag::Yr, - start_line: 3, + start_line: LineNumber::new(3).unwrap(), }); let tree = ParseNode::Tree { label: TreeLabel::YrRange, diff --git a/src/finder/emails.rs b/src/finder/emails.rs index 9eb220335..05c4bf3e3 100644 --- a/src/finder/emails.rs +++ b/src/finder/emails.rs @@ -1,6 +1,8 @@ use regex::Regex; use std::sync::LazyLock; +use crate::models::LineNumber; + use super::DetectionConfig; use super::host::is_good_email_domain; use super::junk_data::classify_email; @@ -8,8 +10,8 @@ use super::junk_data::classify_email; #[derive(Debug, Clone, PartialEq)] pub struct EmailDetection { pub email: String, - pub start_line: usize, - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } static EMAILS_REGEX: LazyLock = LazyLock::new(|| { @@ -20,7 +22,7 @@ pub fn find_emails(text: &str, config: &DetectionConfig) -> Vec let mut detections = Vec::new(); for (line_index, line) in text.lines().enumerate() { - let line_number = line_index + 1; + let line_number = LineNumber::from_0_indexed(line_index); for matched in EMAILS_REGEX.find_iter(line) { let email = matched.as_str().to_lowercase(); if !is_good_email_domain(&email) { diff --git a/src/finder/golden_test.rs b/src/finder/golden_test.rs index 29f6f23b7..30be4d416 100644 --- a/src/finder/golden_test.rs +++ b/src/finder/golden_test.rs @@ -59,8 +59,8 @@ mod tests { .into_iter() .map(|d| ExpectedEmail { email: d.email, - start_line: d.start_line, - end_line: d.end_line, + start_line: d.start_line.get(), + end_line: d.end_line.get(), }) .collect() } @@ -70,8 +70,8 @@ mod tests { .into_iter() .map(|d| ExpectedUrl { url: d.url, - start_line: d.start_line, - end_line: d.end_line, + start_line: d.start_line.get(), + end_line: d.end_line.get(), }) .collect() } diff --git a/src/finder/mod.rs b/src/finder/mod.rs index 5ab9fcefe..8203985e0 100644 --- a/src/finder/mod.rs +++ b/src/finder/mod.rs @@ -28,6 +28,7 @@ impl Default for DetectionConfig { #[cfg(test)] mod tests { use super::{DetectionConfig, find_emails, find_urls}; + use crate::models::LineNumber; #[test] fn test_find_emails_threshold() { @@ -39,7 +40,7 @@ mod tests { let emails = find_emails(text, &config); assert_eq!(emails.len(), 2); assert_eq!(emails[0].email, "a@b.com"); - assert_eq!(emails[0].start_line, 1); + assert_eq!(emails[0].start_line, LineNumber::ONE); } #[test] diff --git a/src/finder/urls.rs b/src/finder/urls.rs index 1b24e3b2a..976d274d6 100644 --- a/src/finder/urls.rs +++ b/src/finder/urls.rs @@ -3,6 +3,8 @@ use std::sync::LazyLock; use url::Url; +use crate::models::LineNumber; + use super::DetectionConfig; use super::host::is_good_url_host_domain; use super::junk_data::classify_url; @@ -10,8 +12,8 @@ use super::junk_data::classify_url; #[derive(Debug, Clone, PartialEq)] pub struct UrlDetection { pub url: String, - pub start_line: usize, - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } static URLS_REGEX: LazyLock = LazyLock::new(|| { @@ -114,7 +116,7 @@ pub fn find_urls(text: &str, config: &DetectionConfig) -> Vec { let mut detections = Vec::new(); for (line_index, line) in text.lines().enumerate() { - let line_number = line_index + 1; + let line_number = LineNumber::from_0_indexed(line_index); let normalized_line = line.replace("\\r\\n", "\\n").replace("\\r", "\\n"); for segment in normalized_line.split("\\n") { diff --git a/src/license_detection/aho_match.rs b/src/license_detection/aho_match.rs index 76b773ac6..294ef4a3a 100644 --- a/src/license_detection/aho_match.rs +++ b/src/license_detection/aho_match.rs @@ -13,6 +13,7 @@ use crate::license_detection::models::position_span::PositionSpan; use crate::license_detection::models::{LicenseMatch, MatchCoordinates, MatcherKind}; use crate::license_detection::position_set::PositionSet; use crate::license_detection::query::QueryRun; +use crate::models::LineNumber; pub const MATCH_AHO: MatcherKind = MatcherKind::Aho; @@ -130,12 +131,15 @@ pub fn aho_match_with_extra_matchables( 100.0 }; - let start_line = query_run.line_for_pos(qstart).unwrap_or(1); + let start_line = query_run + .line_for_pos(qstart) + .and_then(LineNumber::new) + .unwrap_or(LineNumber::ONE); let end_line = if qend > qstart { - // qend is exclusive, so the last matched token is at qend-1 query_run .line_for_pos(qend.saturating_sub(1)) + .and_then(LineNumber::new) .unwrap_or(start_line) } else { start_line @@ -604,8 +608,8 @@ mod tests { let matches = aho_match(run.get_index(), &run); assert_eq!(matches.len(), 1); - assert_eq!(matches[0].start_line, 5); - assert_eq!(matches[0].end_line, 5); + assert_eq!(matches[0].start_line, LineNumber::new(5).expect("valid")); + assert_eq!(matches[0].end_line, LineNumber::new(5).expect("valid")); } #[test] diff --git a/src/license_detection/detection/analysis.rs b/src/license_detection/detection/analysis.rs index 92ad7a54b..7e3ff6204 100644 --- a/src/license_detection/detection/analysis.rs +++ b/src/license_detection/detection/analysis.rs @@ -90,7 +90,12 @@ pub(super) fn is_false_positive(matches: &[LicenseMatch]) -> bool { return false; } - let start_line = matches.iter().map(|m| m.start_line).min().unwrap_or(0); + let start_line = matches + .iter() + .map(|m| m.start_line) + .min() + .map(|ln| ln.get()) + .unwrap_or(0); let bare_rules = ["gpl_bare", "freeware_bare", "public-domain_bare"]; let is_bare_rule = matches.iter().all(|m| { @@ -533,8 +538,18 @@ fn has_alternative_license_notice(matches: &[LicenseMatch], source_text: Option< return false; }; - let start_line = matches.iter().map(|m| m.start_line).min().unwrap_or(0); - let end_line = matches.iter().map(|m| m.end_line).max().unwrap_or(0); + let start_line = matches + .iter() + .map(|m| m.start_line) + .min() + .map(|ln| ln.get()) + .unwrap_or(0); + let end_line = matches + .iter() + .map(|m| m.end_line) + .max() + .map(|ln| ln.get()) + .unwrap_or(0); if start_line == 0 || end_line < start_line { return false; } @@ -597,6 +612,7 @@ mod tests { use crate::license_detection::models::{ LicenseMatch, MatchCoordinates, MatcherKind, PositionSpan, }; + use crate::models::LineNumber; fn create_test_match(coverage: f32, rule_identifier: &str) -> LicenseMatch { LicenseMatch { @@ -604,8 +620,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: Some("test.txt".to_string()), - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).expect("valid line number"), start_token: 1, end_token: 11, matcher: crate::license_detection::models::MatcherKind::Hash, @@ -640,6 +656,8 @@ mod tests { rule_relevance: u8, rule_identifier: &str, ) -> LicenseMatch { + let start_line = LineNumber::new(start_line).expect("valid start_line"); + let end_line = LineNumber::new(end_line).expect("valid end_line"); LicenseMatch { rid: 0, license_expression: license_expression.to_string(), @@ -647,8 +665,8 @@ mod tests { from_file: Some("test.txt".to_string()), start_line, end_line, - start_token: start_line, - end_token: end_line + 1, + start_token: start_line.get(), + end_token: end_line.get() + 1, matcher: matcher.parse().expect("invalid test matcher"), score, matched_length, @@ -663,8 +681,8 @@ mod tests { is_from_license: false, rule_start_token: 0, coordinates: MatchCoordinates::query_region(PositionSpan::range( - start_line, - end_line + 1, + start_line.get(), + end_line.get() + 1, )), candidate_resemblance: 0.0, candidate_containment: 0.0, diff --git a/src/license_detection/detection/grouping.rs b/src/license_detection/detection/grouping.rs index 203288db9..f68b57162 100644 --- a/src/license_detection/detection/grouping.rs +++ b/src/license_detection/detection/grouping.rs @@ -78,7 +78,7 @@ pub(super) fn should_group_together( cur: &LicenseMatch, threshold: usize, ) -> bool { - let line_gap = cur.start_line.saturating_sub(prev.end_line); + let line_gap = cur.start_line.get().saturating_sub(prev.end_line.get()); line_gap <= threshold } @@ -132,6 +132,7 @@ pub(super) fn is_correct_detection(matches: &[LicenseMatch]) -> bool { mod tests { use super::*; use crate::license_detection::models::{LicenseMatch, MatchCoordinates, PositionSpan}; + use crate::models::LineNumber; fn create_test_match( start_line: usize, @@ -139,13 +140,15 @@ mod tests { matcher: &str, rule_identifier: &str, ) -> LicenseMatch { + let start_line_ln = LineNumber::new(start_line).expect("valid start_line"); + let end_line_ln = LineNumber::new(end_line).expect("valid end_line"); LicenseMatch { rid: 0, license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: Some("test.txt".to_string()), - start_line, - end_line, + start_line: start_line_ln, + end_line: end_line_ln, start_token: start_line, end_token: end_line + 1, matcher: matcher.parse().expect("invalid test matcher"), @@ -176,13 +179,15 @@ mod tests { start_token: usize, end_token: usize, ) -> LicenseMatch { + let start_line_ln = LineNumber::new(start_line).expect("valid start_line"); + let end_line_ln = LineNumber::new(end_line).expect("valid end_line"); LicenseMatch { rid: 0, license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: Some("test.txt".to_string()), - start_line, - end_line, + start_line: start_line_ln, + end_line: end_line_ln, start_token, end_token, matcher: crate::license_detection::models::MatcherKind::Hash, diff --git a/src/license_detection/detection/identifier.rs b/src/license_detection/detection/identifier.rs index 6112e3f99..62bbef9a4 100644 --- a/src/license_detection/detection/identifier.rs +++ b/src/license_detection/detection/identifier.rs @@ -162,6 +162,7 @@ pub(super) fn compute_detection_coverage(matches: &[LicenseMatch]) -> f32 { mod tests { use super::*; use crate::license_detection::models::{LicenseMatch, MatchCoordinates, PositionSpan}; + use crate::models::LineNumber; fn create_test_match() -> LicenseMatch { LicenseMatch { @@ -169,8 +170,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: Some("test.txt".to_string()), - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).expect("valid line number"), start_token: 1, end_token: 11, matcher: crate::license_detection::models::MatcherKind::Hash, diff --git a/src/license_detection/detection/mod.rs b/src/license_detection/detection/mod.rs index a5565295e..0ef0182b4 100644 --- a/src/license_detection/detection/mod.rs +++ b/src/license_detection/detection/mod.rs @@ -234,7 +234,11 @@ pub(crate) fn get_unique_detections(detections: &[LicenseDetection]) -> Vec) -> Vec) -> Vec { detections.sort_by(|a, b| { - let min_line_a = a.matches.iter().map(|m| m.start_line).min().unwrap_or(0); - let min_line_b = b.matches.iter().map(|m| m.start_line).min().unwrap_or(0); + let min_line_a = a + .matches + .iter() + .map(|m| m.start_line) + .min() + .map(|ln| ln.get()) + .unwrap_or(0); + let min_line_b = b + .matches + .iter() + .map(|m| m.start_line) + .min() + .map(|ln| ln.get()) + .unwrap_or(0); min_line_a .cmp(&min_line_b) .then_with(|| a.identifier.cmp(&b.identifier)) @@ -568,6 +584,7 @@ mod tests { use super::*; use crate::license_detection::models::{License, LicenseMatch, MatchCoordinates, PositionSpan}; use crate::license_detection::spdx_mapping::build_spdx_mapping; + use crate::models::LineNumber; fn create_test_match( start_line: usize, @@ -575,13 +592,15 @@ mod tests { matcher: &str, rule_identifier: &str, ) -> LicenseMatch { + let start_line_ln = LineNumber::new(start_line).expect("valid start_line"); + let end_line_ln = LineNumber::new(end_line).expect("valid end_line"); LicenseMatch { rid: 0, license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: Some("test.txt".to_string()), - start_line, - end_line, + start_line: start_line_ln, + end_line: end_line_ln, start_token: start_line, end_token: end_line + 1, matcher: matcher.parse().expect("invalid test matcher"), @@ -1387,8 +1406,11 @@ mod tests { file_regions: Vec::new(), }; let sorted = sort_detections_by_line(vec![d1, d2]); - assert_eq!(sorted[0].matches[0].start_line, 1); - assert_eq!(sorted[1].matches[0].start_line, 20); + assert_eq!(sorted[0].matches[0].start_line, LineNumber::ONE); + assert_eq!( + sorted[1].matches[0].start_line, + LineNumber::new(20).expect("valid") + ); } #[test] @@ -1532,8 +1554,14 @@ mod tests { ); assert_eq!(detections[0].file_regions.len(), 1); assert_eq!(detections[0].file_regions[0].path, "src/lib.rs"); - assert_eq!(detections[0].file_regions[0].start_line, 4); - assert_eq!(detections[0].file_regions[0].end_line, 8); + assert_eq!( + detections[0].file_regions[0].start_line, + LineNumber::new(4).expect("valid") + ); + assert_eq!( + detections[0].file_regions[0].end_line, + LineNumber::new(8).expect("valid") + ); } #[test] @@ -1555,8 +1583,14 @@ mod tests { assert_eq!(detections[0].file_regions.len(), 1); assert_eq!(detections[0].file_regions[0].path, "src/lib.rs"); - assert_eq!(detections[0].file_regions[0].start_line, 4); - assert_eq!(detections[0].file_regions[0].end_line, 25); + assert_eq!( + detections[0].file_regions[0].start_line, + LineNumber::new(4).expect("valid") + ); + assert_eq!( + detections[0].file_regions[0].end_line, + LineNumber::new(25).expect("valid") + ); } #[test] @@ -1569,8 +1603,8 @@ mod tests { identifier: Some("mit-shared".to_string()), file_regions: vec![FileRegion { path: "src/one.rs".to_string(), - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).expect("valid"), }], }; let second = LicenseDetection { @@ -1581,8 +1615,8 @@ mod tests { identifier: Some("mit-shared".to_string()), file_regions: vec![FileRegion { path: "src/two.rs".to_string(), - start_line: 20, - end_line: 30, + start_line: LineNumber::new(20).expect("valid"), + end_line: LineNumber::new(30).expect("valid"), }], }; let third = LicenseDetection { @@ -1593,8 +1627,8 @@ mod tests { identifier: Some("mit-shared".to_string()), file_regions: vec![FileRegion { path: "src/two.rs".to_string(), - start_line: 20, - end_line: 30, + start_line: LineNumber::new(20).expect("valid"), + end_line: LineNumber::new(30).expect("valid"), }], }; @@ -1615,8 +1649,8 @@ mod tests { identifier: None, file_regions: vec![FileRegion { path: "src/one.rs".to_string(), - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).expect("valid"), }], }; @@ -1635,8 +1669,8 @@ mod tests { identifier: Some("mit-shared".to_string()), file_regions: vec![FileRegion { path: "src/one.rs".to_string(), - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).expect("valid"), }], }; let second = LicenseDetection { @@ -1647,8 +1681,8 @@ mod tests { identifier: Some("mit-shared".to_string()), file_regions: vec![FileRegion { path: "src/two.rs".to_string(), - start_line: 20, - end_line: 30, + start_line: LineNumber::new(20).expect("valid"), + end_line: LineNumber::new(30).expect("valid"), }], }; diff --git a/src/license_detection/detection/types.rs b/src/license_detection/detection/types.rs index 324bf8d61..e518f2b9a 100644 --- a/src/license_detection/detection/types.rs +++ b/src/license_detection/detection/types.rs @@ -1,12 +1,13 @@ //! Core detection data structures. use crate::license_detection::models::LicenseMatch; +use crate::models::LineNumber; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub(crate) struct FileRegion { pub(crate) path: String, - pub(crate) start_line: usize, - pub(crate) end_line: usize, + pub(crate) start_line: LineNumber, + pub(crate) end_line: LineNumber, } #[derive(Debug, Clone)] @@ -54,15 +55,18 @@ pub struct LicenseDetection { mod tests { use super::*; use crate::license_detection::models::{MatchCoordinates, PositionSpan}; + use crate::models::LineNumber; fn create_test_match(start_line: usize, end_line: usize) -> LicenseMatch { + let start_line_ln = LineNumber::new(start_line).expect("valid start_line"); + let end_line_ln = LineNumber::new(end_line).expect("valid end_line"); LicenseMatch { rid: 0, license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: Some("test.txt".to_string()), - start_line, - end_line, + start_line: start_line_ln, + end_line: end_line_ln, start_token: start_line, end_token: end_line + 1, matcher: crate::license_detection::models::MatcherKind::Hash, diff --git a/src/license_detection/hash_match.rs b/src/license_detection/hash_match.rs index 75329acfe..7c0765ab8 100644 --- a/src/license_detection/hash_match.rs +++ b/src/license_detection/hash_match.rs @@ -10,6 +10,7 @@ use crate::license_detection::index::dictionary::{TokenId, TokenKind}; use crate::license_detection::models::position_span::PositionSpan; use crate::license_detection::models::{LicenseMatch, MatchCoordinates, MatcherKind}; use crate::license_detection::query::QueryRun; +use crate::models::LineNumber; pub const MATCH_HASH: MatcherKind = MatcherKind::Hash; @@ -62,9 +63,15 @@ pub fn hash_match(index: &LicenseIndex, query_run: &QueryRun) -> Vec text.clone(), - None => query.matched_text(m.start_line, m.end_line), + None => query.matched_text(m.start_line.get(), m.end_line.get()), }; let max_diff = if rule.relevance >= 80 { 1 } else { 0 }; @@ -551,6 +551,7 @@ mod tests { use crate::license_detection::models::Rule; use crate::license_detection::models::position_span::PositionSpan; use crate::license_detection::unknown_match::MATCH_UNKNOWN; + use crate::models::LineNumber; fn parse_rule_id(rule_identifier: &str) -> Option { let trimmed = rule_identifier.trim(); @@ -572,13 +573,15 @@ mod tests { let matched_len = end_line - start_line + 1; let rule_len = matched_len; let rid = parse_rule_id(rule_identifier).unwrap_or(0); + let start_line_ln = LineNumber::new(start_line).expect("valid start_line"); + let end_line_ln = LineNumber::new(end_line).expect("valid end_line"); LicenseMatch { rid, license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line, - end_line, + start_line: start_line_ln, + end_line: end_line_ln, start_token: start_line, end_token: end_line + 1, matcher: crate::license_detection::models::MatcherKind::Aho, @@ -615,8 +618,12 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line: start_token, - end_line: end_token.saturating_sub(1), + start_line: LineNumber::from_0_indexed(start_token), + end_line: if end_token == 0 { + LineNumber::ONE + } else { + LineNumber::from_0_indexed(end_token - 1) + }, start_token, end_token, matcher: crate::license_detection::models::MatcherKind::Aho, @@ -1150,12 +1157,11 @@ mod tests { }); let mut m = create_test_match_with_tokens("#0", 0, 3, 3); - m.start_line = 1; - m.end_line = 50; - - let mut m2 = create_test_match_with_tokens("#0", 10, 13, 3); - m2.start_line = 1; - m2.end_line = 50; + m.start_line = LineNumber::ONE; + m.end_line = LineNumber::new(50).expect("valid"); + let mut m2 = create_test_match_with_tokens("#0", 0, 3, 3); + m2.start_line = LineNumber::ONE; + m2.end_line = LineNumber::new(50).expect("valid"); let matches = vec![m, m2]; let filtered = filter_short_matches_scattered_on_too_many_lines(&index, &matches); diff --git a/src/license_detection/match_refine/handle_overlaps.rs b/src/license_detection/match_refine/handle_overlaps.rs index 9bf7cb60d..eca9f94b9 100644 --- a/src/license_detection/match_refine/handle_overlaps.rs +++ b/src/license_detection/match_refine/handle_overlaps.rs @@ -428,6 +428,7 @@ mod tests { use super::*; use crate::license_detection::models::MatchCoordinates; use crate::license_detection::models::position_span::PositionSpan; + use crate::models::LineNumber; fn parse_rule_id(rule_identifier: &str) -> Option { let trimmed = rule_identifier.trim(); @@ -449,13 +450,15 @@ mod tests { let matched_len = end_line - start_line + 1; let rule_len = matched_len; let rid = parse_rule_id(rule_identifier).unwrap_or(0); + let start_line_ln = LineNumber::new(start_line).expect("valid start_line"); + let end_line_ln = LineNumber::new(end_line).expect("valid end_line"); LicenseMatch { rid, license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line, - end_line, + start_line: start_line_ln, + end_line: end_line_ln, start_token: start_line, end_token: end_line + 1, matcher: crate::license_detection::models::MatcherKind::Aho, @@ -492,8 +495,12 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line: start_token, - end_line: end_token.saturating_sub(1), + start_line: LineNumber::from_0_indexed(start_token), + end_line: if end_token == 0 { + LineNumber::ONE + } else { + LineNumber::from_0_indexed(end_token - 1) + }, start_token, end_token, matcher: crate::license_detection::models::MatcherKind::Aho, @@ -529,8 +536,8 @@ mod tests { let (filtered, _) = filter_contained_matches(&matches); assert_eq!(filtered.len(), 1); - assert_eq!(filtered[0].start_line, 1); - assert_eq!(filtered[0].end_line, 20); + assert_eq!(filtered[0].start_line, LineNumber::ONE); + assert_eq!(filtered[0].end_line, LineNumber::new(20).expect("valid")); } #[test] @@ -544,8 +551,8 @@ mod tests { let (filtered, _) = filter_contained_matches(&matches); assert_eq!(filtered.len(), 1); - assert_eq!(filtered[0].start_line, 1); - assert_eq!(filtered[0].end_line, 30); + assert_eq!(filtered[0].start_line, LineNumber::ONE); + assert_eq!(filtered[0].end_line, LineNumber::new(30).expect("valid")); } #[test] @@ -617,7 +624,7 @@ mod tests { let (filtered, _) = filter_contained_matches(&matches); assert_eq!(filtered.len(), 1); - assert_eq!(filtered[0].end_line, 30); + assert_eq!(filtered[0].end_line, LineNumber::new(30).expect("valid")); } #[test] @@ -633,8 +640,8 @@ mod tests { let (filtered, _) = filter_contained_matches(&matches); assert_eq!(filtered.len(), 1); - assert_eq!(filtered[0].start_line, 1); - assert_eq!(filtered[0].end_line, 50); + assert_eq!(filtered[0].start_line, LineNumber::ONE); + assert_eq!(filtered[0].end_line, LineNumber::new(50).expect("valid")); } #[test] @@ -745,12 +752,12 @@ mod tests { #[test] fn test_filter_contained_matches_gpl_variant_zero_tokens() { let mut gpl_1_0 = create_test_match_with_tokens("#20560", 0, 0, 9); - gpl_1_0.start_line = 13; - gpl_1_0.end_line = 14; + gpl_1_0.start_line = LineNumber::new(13).expect("valid"); + gpl_1_0.end_line = LineNumber::new(14).expect("valid"); let mut gpl_2_0 = create_test_match_with_tokens("#16218", 0, 0, 22); - gpl_2_0.start_line = 13; - gpl_2_0.end_line = 15; + gpl_2_0.start_line = LineNumber::new(13).expect("valid"); + gpl_2_0.end_line = LineNumber::new(15).expect("valid"); let matches = vec![gpl_1_0.clone(), gpl_2_0.clone()]; @@ -934,9 +941,9 @@ mod tests { let (kept, _) = filter_overlapping_matches(matches, &index); assert_eq!(kept.len(), 3); - assert_eq!(kept[0].start_line, 1); - assert_eq!(kept[1].start_line, 25); - assert_eq!(kept[2].start_line, 40); + assert_eq!(kept[0].start_line, LineNumber::ONE); + assert_eq!(kept[1].start_line, LineNumber::new(25).expect("valid")); + assert_eq!(kept[2].start_line, LineNumber::new(40).expect("valid")); } #[test] @@ -1166,8 +1173,8 @@ mod tests { assert_eq!(to_keep.len(), 1); assert_eq!(to_keep[0].rule_identifier, "#2"); - assert_eq!(to_keep[0].start_line, 50); - assert_eq!(to_keep[0].end_line, 65); + assert_eq!(to_keep[0].start_line, LineNumber::new(50).expect("valid")); + assert_eq!(to_keep[0].end_line, LineNumber::new(65).expect("valid")); } #[test] diff --git a/src/license_detection/match_refine/merge.rs b/src/license_detection/match_refine/merge.rs index 953dc4e49..dbc3cd6d6 100644 --- a/src/license_detection/match_refine/merge.rs +++ b/src/license_detection/match_refine/merge.rs @@ -369,6 +369,7 @@ mod tests { use super::*; use crate::license_detection::index::LicenseIndex; use crate::license_detection::models::PositionSpan; + use crate::models::LineNumber; fn parse_rule_id(rule_identifier: &str) -> Option { let trimmed = rule_identifier.trim(); @@ -398,8 +399,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line, - end_line, + start_line: LineNumber::new(start_line).unwrap(), + end_line: LineNumber::new(end_line).unwrap(), start_token: start_line, end_token: end_line + 1, matcher: crate::license_detection::models::MatcherKind::Aho, @@ -434,8 +435,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line: start_token, - end_line: end_token.saturating_sub(1), + start_line: LineNumber::from_0_indexed(start_token), + end_line: LineNumber::from_0_indexed(end_token.saturating_sub(1)), start_token, end_token, matcher: crate::license_detection::models::MatcherKind::Aho, @@ -473,8 +474,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line: start_token, - end_line: end_token.saturating_sub(1), + start_line: LineNumber::from_0_indexed(start_token), + end_line: LineNumber::from_0_indexed(end_token.saturating_sub(1)), start_token, end_token, matcher: crate::license_detection::models::MatcherKind::Aho, @@ -550,8 +551,8 @@ mod tests { assert_eq!(merged.len(), 1); assert_eq!(merged[0].rule_identifier, "#1"); - assert_eq!(merged[0].start_line, 1); - assert_eq!(merged[0].end_line, 15); + assert_eq!(merged[0].start_line, LineNumber::ONE); + assert_eq!(merged[0].end_line, LineNumber::new(15).unwrap()); assert_eq!(merged[0].score, 0.9); } @@ -580,8 +581,8 @@ mod tests { assert_eq!(merged.len(), 1); assert_eq!(merged[0].rule_identifier, "#1"); - assert_eq!(merged[0].start_line, 1); - assert_eq!(merged[0].end_line, 20); + assert_eq!(merged[0].start_line, LineNumber::ONE); + assert_eq!(merged[0].end_line, LineNumber::new(20).unwrap()); assert_eq!(merged[0].score, 0.9); } @@ -673,8 +674,8 @@ mod tests { let merged = merge_overlapping_matches(&matches); assert_eq!(merged.len(), 1); - assert_eq!(merged[0].start_line, 1); - assert_eq!(merged[0].end_line, 15); + assert_eq!(merged[0].start_line, LineNumber::ONE); + assert_eq!(merged[0].end_line, LineNumber::new(15).unwrap()); } #[test] @@ -689,8 +690,8 @@ mod tests { let matches = vec![create_test_match("#1", 1, 10, 0.9, 90.0, 100)]; let merged = merge_overlapping_matches(&matches); assert_eq!(merged.len(), 1); - assert_eq!(merged[0].start_line, 1); - assert_eq!(merged[0].end_line, 10); + assert_eq!(merged[0].start_line, LineNumber::ONE); + assert_eq!(merged[0].end_line, LineNumber::new(10).unwrap()); } #[test] @@ -779,8 +780,8 @@ mod tests { let merged = merge_overlapping_matches(&matches); assert_eq!(merged.len(), 1); - assert_eq!(merged[0].start_line, 1); - assert_eq!(merged[0].end_line, 25); + assert_eq!(merged[0].start_line, LineNumber::ONE); + assert_eq!(merged[0].end_line, LineNumber::new(25).unwrap()); } #[test] @@ -805,10 +806,10 @@ mod tests { let merged = merge_overlapping_matches(&matches); assert_eq!(merged.len(), 2); - assert_eq!(merged[0].start_line, 1); - assert_eq!(merged[0].end_line, 10); - assert_eq!(merged[1].start_line, 20); - assert_eq!(merged[1].end_line, 30); + assert_eq!(merged[0].start_line, LineNumber::ONE); + assert_eq!(merged[0].end_line, LineNumber::new(10).unwrap()); + assert_eq!(merged[1].start_line, LineNumber::new(20).unwrap()); + assert_eq!(merged[1].end_line, LineNumber::new(30).unwrap()); } #[test] diff --git a/src/license_detection/match_refine/mod.rs b/src/license_detection/match_refine/mod.rs index a0a191039..5565ffc15 100644 --- a/src/license_detection/match_refine/mod.rs +++ b/src/license_detection/match_refine/mod.rs @@ -331,6 +331,7 @@ mod tests { use super::*; use crate::license_detection::models::MatchCoordinates; use crate::license_detection::models::position_span::PositionSpan; + use crate::models::LineNumber; fn parse_rule_id(rule_identifier: &str) -> Option { let trimmed = rule_identifier.trim(); @@ -357,8 +358,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line, - end_line, + start_line: LineNumber::new(start_line).unwrap(), + end_line: LineNumber::new(end_line).unwrap(), start_token: start_line, end_token: end_line + 1, matcher: crate::license_detection::models::MatcherKind::Aho, @@ -425,8 +426,8 @@ mod tests { assert_eq!(refined.len(), 2); let rule1_match = refined.iter().find(|m| m.rule_identifier == "#1").unwrap(); - assert_eq!(rule1_match.start_line, 1); - assert_eq!(rule1_match.end_line, 15); + assert_eq!(rule1_match.start_line, LineNumber::ONE); + assert_eq!(rule1_match.end_line, LineNumber::new(15).unwrap()); let rule2_match = refined.iter().find(|m| m.rule_identifier == "#2").unwrap(); assert_eq!(rule2_match.score, 80.0); @@ -545,8 +546,8 @@ mod tests { assert_eq!(refined.len(), 1); assert_eq!(refined[0].rule_identifier, "#1"); - assert_eq!(refined[0].start_line, 1); - assert_eq!(refined[0].end_line, 20); + assert_eq!(refined[0].start_line, LineNumber::ONE); + assert_eq!(refined[0].end_line, LineNumber::new(20).unwrap()); } #[test] diff --git a/src/license_detection/models/license_match.rs b/src/license_detection/models/license_match.rs index e8a0d8b82..0feb3d043 100644 --- a/src/license_detection/models/license_match.rs +++ b/src/license_detection/models/license_match.rs @@ -7,6 +7,7 @@ use std::str::FromStr; use crate::license_detection::models::RuleKind; use crate::license_detection::models::position_span::PositionSpan; use crate::license_detection::position_set::PositionSet; +use crate::models::LineNumber; /// Coordinate data for a license match. /// @@ -158,10 +159,10 @@ pub struct LicenseMatch { pub from_file: Option, /// Start line number (1-indexed) - pub start_line: usize, + pub start_line: LineNumber, /// End line number (1-indexed) - pub end_line: usize, + pub end_line: LineNumber, /// Start token position (0-indexed in query token stream) /// Used for dual-criteria match grouping with token gap threshold. @@ -275,8 +276,8 @@ impl Serialize for LicenseMatch { license_expression: &self.license_expression, license_expression_spdx: &self.license_expression_spdx, from_file: &self.from_file, - start_line: self.start_line, - end_line: self.end_line, + start_line: self.start_line.get(), + end_line: self.end_line.get(), start_token: self.start_token, end_token: self.end_token, matcher: self.matcher, @@ -312,8 +313,8 @@ impl Default for LicenseMatch { license_expression: String::new(), license_expression_spdx: None, from_file: None, - start_line: 0, - end_line: 0, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, start_token: 0, end_token: 0, matcher: MatcherKind::default(), diff --git a/src/license_detection/models/mod_tests.rs b/src/license_detection/models/mod_tests.rs index 7acca8053..a4067253e 100644 --- a/src/license_detection/models/mod_tests.rs +++ b/src/license_detection/models/mod_tests.rs @@ -7,6 +7,7 @@ mod tests { License, LicenseMatch, MatchCoordinates, MatcherKind, Rule, RuleKind, }; use crate::license_detection::position_set::PositionSet; + use crate::models::LineNumber; use crate::models::Match as OutputMatch; use std::collections::HashMap; @@ -95,8 +96,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: Some("README.md".to_string()), - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), start_token: 0, end_token: 100, matcher: crate::license_detection::models::MatcherKind::Hash, @@ -501,8 +502,8 @@ mod tests { Some("MIT".to_string()) ); assert_eq!(match_result.from_file, Some("README.md".to_string())); - assert_eq!(match_result.start_line, 1); - assert_eq!(match_result.end_line, 5); + assert_eq!(match_result.start_line, LineNumber::ONE); + assert_eq!(match_result.end_line, LineNumber::new(5).unwrap()); assert_eq!(match_result.matcher, MatcherKind::Hash); assert!((match_result.score - 0.95).abs() < 0.001); } @@ -514,8 +515,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line: 0, - end_line: 0, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, start_token: 0, end_token: 0, matcher: MatcherKind::Hash, @@ -537,7 +538,7 @@ mod tests { }; assert!(match_result.from_file.is_none()); - assert_eq!(match_result.start_line, 0); + assert_eq!(match_result.start_line, LineNumber::ONE); assert_eq!(match_result.score, 0.0); assert!(match_result.matched_text.is_none()); } @@ -595,8 +596,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(3), @@ -622,8 +623,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: Some("README.md".to_string()), - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), start_token: 0, end_token: 100, matcher: crate::license_detection::models::MatcherKind::Hash, @@ -881,13 +882,13 @@ mod tests { #[test] fn test_surround_true() { let outer = LicenseMatch { - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), ..create_license_match() }; let inner = LicenseMatch { - start_line: 5, - end_line: 15, + start_line: LineNumber::new(5).unwrap(), + end_line: LineNumber::new(15).unwrap(), ..create_license_match() }; assert!(outer.surround(&inner)); @@ -896,16 +897,16 @@ mod tests { #[test] fn test_surround_false_same_start() { let outer = LicenseMatch { - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), start_token: 1, end_token: 20, coordinates: MatchCoordinates::query_region(PositionSpan::range(1, 20)), ..create_license_match() }; let inner = LicenseMatch { - start_line: 1, - end_line: 15, + start_line: LineNumber::ONE, + end_line: LineNumber::new(15).unwrap(), start_token: 1, end_token: 15, coordinates: MatchCoordinates::query_region(PositionSpan::range(1, 15)), @@ -917,16 +918,16 @@ mod tests { #[test] fn test_surround_false_same_end() { let outer = LicenseMatch { - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), start_token: 1, end_token: 20, coordinates: MatchCoordinates::query_region(PositionSpan::range(1, 20)), ..create_license_match() }; let inner = LicenseMatch { - start_line: 5, - end_line: 20, + start_line: LineNumber::new(5).unwrap(), + end_line: LineNumber::new(20).unwrap(), start_token: 5, end_token: 20, coordinates: MatchCoordinates::query_region(PositionSpan::range(5, 20)), @@ -938,16 +939,16 @@ mod tests { #[test] fn test_surround_false_reversed() { let outer = LicenseMatch { - start_line: 5, - end_line: 15, + start_line: LineNumber::new(5).unwrap(), + end_line: LineNumber::new(15).unwrap(), start_token: 5, end_token: 15, coordinates: MatchCoordinates::query_region(PositionSpan::range(5, 15)), ..create_license_match() }; let inner = LicenseMatch { - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), start_token: 1, end_token: 20, coordinates: MatchCoordinates::query_region(PositionSpan::range(1, 20)), @@ -959,16 +960,16 @@ mod tests { #[test] fn test_surround_false_adjacent() { let first = LicenseMatch { - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).unwrap(), start_token: 1, end_token: 10, coordinates: MatchCoordinates::query_region(PositionSpan::range(1, 10)), ..create_license_match() }; let second = LicenseMatch { - start_line: 11, - end_line: 20, + start_line: LineNumber::new(11).unwrap(), + end_line: LineNumber::new(20).unwrap(), start_token: 11, end_token: 20, coordinates: MatchCoordinates::query_region(PositionSpan::range(11, 20)), diff --git a/src/license_detection/seq_match/matching.rs b/src/license_detection/seq_match/matching.rs index 491710aec..01c8b80da 100644 --- a/src/license_detection/seq_match/matching.rs +++ b/src/license_detection/seq_match/matching.rs @@ -5,6 +5,7 @@ use crate::license_detection::index::dictionary::TokenId; use crate::license_detection::models::position_span::PositionSpan; use crate::license_detection::models::{LicenseMatch, MatchCoordinates}; use crate::license_detection::query::QueryRun; +use crate::models::LineNumber; use bit_set::BitSet; use std::collections::HashMap; @@ -287,8 +288,14 @@ pub(crate) fn seq_match_with_candidates( let qend = qpos + mlen - 1; let abs_qpos = qpos + query_run.start; let abs_qend = qend + query_run.start; - let start_line = query_run.line_for_pos(abs_qpos).unwrap_or(1); - let end_line = query_run.line_for_pos(abs_qend).unwrap_or(start_line); + let start_line = query_run + .line_for_pos(abs_qpos) + .and_then(LineNumber::new) + .unwrap_or(LineNumber::ONE); + let end_line = query_run + .line_for_pos(abs_qend) + .and_then(LineNumber::new) + .unwrap_or(start_line); let qspan = PositionSpan::range(qpos + query_run.start, qpos + mlen + query_run.start); diff --git a/src/license_detection/seq_match/mod.rs b/src/license_detection/seq_match/mod.rs index 99facb8c2..8bb5c8a71 100644 --- a/src/license_detection/seq_match/mod.rs +++ b/src/license_detection/seq_match/mod.rs @@ -45,6 +45,7 @@ mod tests { use crate::license_detection::query::Query; use crate::license_detection::test_utils::create_test_index; use crate::license_detection::{TokenMultiset, TokenSet}; + use crate::models::LineNumber; use std::collections::HashMap; pub(super) fn create_seq_match_test_index() -> LicenseIndex { @@ -277,8 +278,8 @@ mod tests { "All matches should be for test-license" ); - let start_lines: Vec = matches.iter().map(|m| m.start_line).collect(); - let end_lines: Vec = matches.iter().map(|m| m.end_line).collect(); + let start_lines: Vec = matches.iter().map(|m| m.start_line.get()).collect(); + let end_lines: Vec = matches.iter().map(|m| m.end_line.get()).collect(); assert!( start_lines.iter().all(|&l| l >= 1), @@ -308,11 +309,13 @@ mod tests { let first_match = &matches[0]; assert_eq!( - first_match.start_line, 2, + first_match.start_line, + LineNumber::new(2).unwrap(), "Match should start on line 2 (where license tokens are), not line 1" ); assert_eq!( - first_match.end_line, 2, + first_match.end_line, + LineNumber::new(2).unwrap(), "Match should end on line 2 (where license tokens are), not line 3" ); @@ -323,7 +326,8 @@ mod tests { ); // Verify we can compute it from the query - let matched_text = query.matched_text(first_match.start_line, first_match.end_line); + let matched_text = + query.matched_text(first_match.start_line.get(), first_match.end_line.get()); assert!( matched_text.contains("license"), "Computed matched text should contain 'license'" @@ -352,11 +356,13 @@ mod tests { let first_match = &matches[0]; assert_eq!( - first_match.start_line, 2, + first_match.start_line, + LineNumber::new(2).unwrap(), "Partial match should start on line 2" ); assert_eq!( - first_match.end_line, 2, + first_match.end_line, + LineNumber::new(2).unwrap(), "Partial match should end on line 2" ); diff --git a/src/license_detection/spdx_lid/mod.rs b/src/license_detection/spdx_lid/mod.rs index 64b161d7c..d6f4b104d 100644 --- a/src/license_detection/spdx_lid/mod.rs +++ b/src/license_detection/spdx_lid/mod.rs @@ -21,6 +21,7 @@ use crate::license_detection::index::LicenseIndex; use crate::license_detection::models::position_span::PositionSpan; use crate::license_detection::models::{LicenseMatch, MatchCoordinates, MatcherKind}; use crate::license_detection::query::Query; +use crate::models::LineNumber; pub const MATCH_SPDX_ID: MatcherKind = MatcherKind::SpdxId; @@ -324,10 +325,14 @@ pub fn spdx_lid_match(index: &LicenseIndex, query: &Query) -> Vec let matched_length = end_token.saturating_sub(*start_token); let match_coverage = 100.0; - let start_line = query.line_for_pos(*start_token).unwrap_or(1); + let start_line = query + .line_for_pos(*start_token) + .and_then(LineNumber::new) + .unwrap_or(LineNumber::ONE); let end_line = end_token .checked_sub(1) .and_then(|pos| query.line_for_pos(pos)) + .and_then(LineNumber::new) .unwrap_or(start_line); let rid = index diff --git a/src/license_detection/spdx_lid/test.rs b/src/license_detection/spdx_lid/test.rs index 1262bb3a8..b6e9d4250 100644 --- a/src/license_detection/spdx_lid/test.rs +++ b/src/license_detection/spdx_lid/test.rs @@ -5,6 +5,7 @@ mod tests { use crate::license_detection::query::Query; use crate::license_detection::spdx_lid::*; use crate::license_detection::test_utils::{create_mock_rule_simple, create_test_index}; + use crate::models::LineNumber; fn extract_cleaned_spdx_expressions(text: &str) -> Vec { text.lines() @@ -323,8 +324,8 @@ mod tests { assert_eq!(matches.len(), 1); assert_eq!(matches[0].license_expression, "mit"); assert_eq!(matches[0].license_expression_spdx, Some("MIT".to_string())); - assert_eq!(matches[0].start_line, 1); - assert_eq!(matches[0].end_line, 1); + assert_eq!(matches[0].start_line, LineNumber::ONE); + assert_eq!(matches[0].end_line, LineNumber::ONE); assert_eq!(matches[0].matcher, MATCH_SPDX_ID); assert_eq!(matches[0].matched_length, 4); assert_eq!(matches[0].rule_length, 4); @@ -429,8 +430,8 @@ mod tests { let matches = spdx_lid_match(&index, &query); assert_eq!(matches.len(), 1); - assert_eq!(matches[0].start_line, 1); - assert_eq!(matches[0].end_line, 1); + assert_eq!(matches[0].start_line, LineNumber::ONE); + assert_eq!(matches[0].end_line, LineNumber::ONE); assert_eq!(matches[0].matched_length, 3); assert_eq!(matches[0].rule_length, 3); assert_eq!(matches[0].score, 100.0); diff --git a/src/license_detection/tests.rs b/src/license_detection/tests.rs index 5965442de..16ff709bf 100644 --- a/src/license_detection/tests.rs +++ b/src/license_detection/tests.rs @@ -3,6 +3,7 @@ use once_cell::sync::Lazy; use std::sync::Once; use crate::license_detection::models::{MatchCoordinates, position_span::PositionSpan}; +use crate::models::LineNumber; static TEST_ENGINE: Lazy = Lazy::new(|| { LicenseDetectionEngine::from_embedded().expect("Should initialize from embedded artifact") @@ -462,7 +463,7 @@ fn test_engine_matched_text_populated() { for detection in &detections { for m in &detection.matches { assert!( - m.start_line > 0, + m.start_line >= LineNumber::ONE, "start_line should be populated for matcher {}", m.matcher ); diff --git a/src/license_detection/unknown_match.rs b/src/license_detection/unknown_match.rs index 202eae13d..eb5b72544 100644 --- a/src/license_detection/unknown_match.rs +++ b/src/license_detection/unknown_match.rs @@ -12,6 +12,7 @@ use crate::license_detection::models::{LicenseMatch, MatchCoordinates, MatcherKi use crate::license_detection::position_set::PositionSet; use crate::license_detection::query::Query; use crate::license_detection::tokenize::STOPWORDS; +use crate::models::LineNumber; pub const MATCH_UNKNOWN: MatcherKind = MatcherKind::Unknown; @@ -238,11 +239,17 @@ fn create_unknown_match_from_qspan(query: &Query, qspan: &PositionSet) -> Option let start = qspan.min_pos(); let end = qspan.max_pos() + 1; - let start_line = query.line_by_pos.get(start).copied().unwrap_or(1); + let start_line = query + .line_by_pos + .get(start) + .copied() + .and_then(LineNumber::new) + .unwrap_or(LineNumber::ONE); let end_line = query .line_by_pos .get(end.saturating_sub(1)) .copied() + .and_then(LineNumber::new) .unwrap_or(start_line); let qspan_positions: Vec = qspan.iter().collect(); @@ -288,8 +295,8 @@ fn create_unknown_match_from_qspan(query: &Query, qspan: &PositionSet) -> Option fn build_unknown_rule_text( query: &Query, qspan_positions: &[usize], - start_line: usize, - end_line: usize, + start_line: LineNumber, + end_line: LineNumber, ) -> String { let Some(&start_pos) = qspan_positions.first() else { return String::new(); @@ -305,8 +312,8 @@ fn build_unknown_rule_text( &matched_positions, start_pos, end_pos, - start_line, - end_line, + start_line.get(), + end_line.get(), ); let line_endings = collect_line_endings(&query.text); @@ -809,8 +816,8 @@ mod tests { license_expression: "test".to_string(), license_expression_spdx: Some("TEST".to_string()), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, start_token: 0, end_token: 10, matcher: MatcherKind::Aho, @@ -858,8 +865,8 @@ mod tests { license_expression: "test".to_string(), license_expression_spdx: Some("TEST".to_string()), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, start_token: 5, end_token: 10, matcher: MatcherKind::Aho, @@ -906,8 +913,8 @@ mod tests { license_expression: "test".to_string(), license_expression_spdx: Some("TEST".to_string()), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, start_token: 0, end_token: 15, matcher: MatcherKind::Aho, @@ -989,8 +996,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, start_token: 0, end_token: 5, matcher: MatcherKind::Aho, @@ -1014,7 +1021,7 @@ mod tests { let matches = unknown_match(&index, &query, &known_matches); assert!( - matches.is_empty() || matches[0].start_line > 1, + matches.is_empty() || matches[0].start_line > LineNumber::ONE, "Should not re-detect known regions" ); } diff --git a/src/main_test.rs b/src/main_test.rs index 2678ad4ca..ceb9635c3 100644 --- a/src/main_test.rs +++ b/src/main_test.rs @@ -1,4 +1,5 @@ use super::*; +use crate::models::LineNumber; use clap::Parser; use serde_json::json; use std::fs; @@ -317,8 +318,8 @@ fn from_json_loaded_manifest_detections_can_be_recomputed_into_top_level_uniques license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -381,8 +382,8 @@ fn from_json_recomputes_top_level_uniques_even_without_shaping_flags() { license_expression: "gpl-2.0-only".to_string(), license_expression_spdx: "GPL-2.0-only".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -447,8 +448,8 @@ fn from_json_recomputes_top_level_outputs_after_manifest_reference_following() { license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -472,8 +473,8 @@ fn from_json_recomputes_top_level_outputs_after_manifest_reference_following() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(50), @@ -564,8 +565,8 @@ fn from_json_recomputes_top_level_outputs_after_package_inheritance_following() from_file: Some( "venv/lib/python3.11/site-packages/demo-1.0.dist-info/METADATA".to_string(), ), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(1), @@ -594,8 +595,8 @@ fn from_json_recomputes_top_level_outputs_after_package_inheritance_following() license_expression: "free-unknown".to_string(), license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(), from_file: Some("venv/lib/python3.11/site-packages/locale/django.po".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(11), diff --git a/src/models/file_info.rs b/src/models/file_info.rs index c61ee97c6..ec1cc21ec 100644 --- a/src/models/file_info.rs +++ b/src/models/file_info.rs @@ -11,6 +11,7 @@ use sha1::{Digest, Sha1}; use super::DatasourceId; use super::GitSha1; +use super::LineNumber; use super::Md5Digest; use super::PackageType; use super::Sha1Digest; @@ -677,8 +678,8 @@ pub struct Match { pub license_expression_spdx: String, #[serde(skip_serializing_if = "Option::is_none")] pub from_file: Option, - pub start_line: usize, - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, #[serde(skip_serializing_if = "Option::is_none")] pub matcher: Option, pub score: f64, @@ -702,22 +703,22 @@ pub struct Match { #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Copyright { pub copyright: String, - pub start_line: usize, - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Holder { pub holder: String, - pub start_line: usize, - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Author { pub author: String, - pub start_line: usize, - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } /// Package dependency information with version constraints. @@ -1291,8 +1292,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -1369,8 +1370,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -1620,15 +1621,15 @@ pub fn build_package_uid(purl: &str) -> String { #[derive(Serialize, Deserialize, Debug, Clone)] pub struct OutputEmail { pub email: String, - pub start_line: usize, - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } #[derive(Serialize, Deserialize, Debug, Clone)] pub struct OutputURL { pub url: String, - pub start_line: usize, - pub end_line: usize, + pub start_line: LineNumber, + pub end_line: LineNumber, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] diff --git a/src/models/line_number.rs b/src/models/line_number.rs new file mode 100644 index 000000000..54333a61b --- /dev/null +++ b/src/models/line_number.rs @@ -0,0 +1,72 @@ +use std::num::NonZeroUsize; +use std::ops::{Add, AddAssign, Sub}; + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct LineNumber(NonZeroUsize); + +impl LineNumber { + pub const ONE: Self = match NonZeroUsize::new(1) { + Some(n) => Self(n), + None => unreachable!(), + }; + + pub fn new(n: usize) -> Option { + NonZeroUsize::new(n).map(Self) + } + + pub fn from_0_indexed(i: usize) -> Self { + Self(NonZeroUsize::new(i + 1).expect("0-indexed line overflow")) + } + + pub fn get(self) -> usize { + self.0.get() + } + + pub fn saturating_add(self, n: usize) -> Self { + Self(NonZeroUsize::new(self.0.get().saturating_add(n)).expect("LineNumber overflow")) + } + + pub fn saturating_sub(self, n: usize) -> usize { + self.0.get().saturating_sub(n) + } + + pub fn abs_diff(self, other: Self) -> usize { + self.0.get().abs_diff(other.0.get()) + } +} + +impl Add for LineNumber { + type Output = Self; + fn add(self, rhs: usize) -> Self::Output { + Self(NonZeroUsize::new(self.0.get() + rhs).expect("LineNumber overflow")) + } +} + +impl AddAssign for LineNumber { + fn add_assign(&mut self, rhs: usize) { + *self = *self + rhs; + } +} + +impl Sub for LineNumber { + type Output = usize; + fn sub(self, rhs: usize) -> Self::Output { + self.0.get() - rhs + } +} + +impl Sub for LineNumber { + type Output = usize; + fn sub(self, rhs: Self) -> Self::Output { + self.0.get() - rhs.0.get() + } +} + +impl std::fmt::Display for LineNumber { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/src/models/mod.rs b/src/models/mod.rs index d2213cba0..16b0e115b 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -1,6 +1,7 @@ mod datasource_id; mod digest; pub(crate) mod file_info; +mod line_number; mod output; mod package_type; @@ -11,6 +12,7 @@ pub use file_info::{ LicenseDetection, LicensePolicyEntry, Match, OutputEmail, OutputURL, Package, PackageData, Party, ResolvedPackage, TopLevelDependency, }; +pub use line_number::LineNumber; pub use package_type::PackageType; #[cfg(test)] diff --git a/src/output/debian.rs b/src/output/debian.rs index 132eb3877..7f7a33e2d 100644 --- a/src/output/debian.rs +++ b/src/output/debian.rs @@ -118,7 +118,7 @@ fn unique_license_texts(detections: &[crate::models::LicenseDetection]) -> Vec<& #[cfg(test)] mod tests { use super::{detected_license_expression, unique_license_texts}; - use crate::models::{FileInfo, FileType, LicenseDetection, Match}; + use crate::models::{FileInfo, FileType, LicenseDetection, LineNumber, Match}; #[test] fn unique_license_texts_deduplicates_by_region_and_rule() { @@ -130,8 +130,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("src/lib.rs".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(3), @@ -147,8 +147,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("src/lib.rs".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(3), diff --git a/src/output/html.rs b/src/output/html.rs index 0db7023f6..b642be87c 100644 --- a/src/output/html.rs +++ b/src/output/html.rs @@ -83,8 +83,8 @@ pub(crate) fn write_html_report(output: &Output, writer: &mut dyn Write) -> io:: for c in &file.copyrights { let mut row = BTreeMap::new(); row.insert("path".to_string(), file.path.clone()); - row.insert("start".to_string(), c.start_line.to_string()); - row.insert("end".to_string(), c.end_line.to_string()); + row.insert("start".to_string(), c.start_line.get().to_string()); + row.insert("end".to_string(), c.end_line.get().to_string()); row.insert("what".to_string(), "copyright".to_string()); row.insert("value".to_string(), c.copyright.clone()); license_copyright_rows.push(row); @@ -93,8 +93,8 @@ pub(crate) fn write_html_report(output: &Output, writer: &mut dyn Write) -> io:: for m in &detection.matches { let mut row = BTreeMap::new(); row.insert("path".to_string(), file.path.clone()); - row.insert("start".to_string(), m.start_line.to_string()); - row.insert("end".to_string(), m.end_line.to_string()); + row.insert("start".to_string(), m.start_line.get().to_string()); + row.insert("end".to_string(), m.end_line.get().to_string()); row.insert("what".to_string(), "license".to_string()); row.insert("value".to_string(), detection.license_expression.clone()); license_copyright_rows.push(row); @@ -105,32 +105,32 @@ pub(crate) fn write_html_report(output: &Output, writer: &mut dyn Write) -> io:: let mut row = BTreeMap::new(); row.insert("path".to_string(), file.path.clone()); row.insert("holder".to_string(), h.holder.clone()); - row.insert("start".to_string(), h.start_line.to_string()); - row.insert("end".to_string(), h.end_line.to_string()); + row.insert("start".to_string(), h.start_line.get().to_string()); + row.insert("end".to_string(), h.end_line.get().to_string()); holder_rows.push(row); } for a in &file.authors { let mut row = BTreeMap::new(); row.insert("path".to_string(), file.path.clone()); row.insert("author".to_string(), a.author.clone()); - row.insert("start".to_string(), a.start_line.to_string()); - row.insert("end".to_string(), a.end_line.to_string()); + row.insert("start".to_string(), a.start_line.get().to_string()); + row.insert("end".to_string(), a.end_line.get().to_string()); author_rows.push(row); } for e in &file.emails { let mut row = BTreeMap::new(); row.insert("path".to_string(), file.path.clone()); row.insert("email".to_string(), e.email.clone()); - row.insert("start".to_string(), e.start_line.to_string()); - row.insert("end".to_string(), e.end_line.to_string()); + row.insert("start".to_string(), e.start_line.get().to_string()); + row.insert("end".to_string(), e.end_line.get().to_string()); email_rows.push(row); } for u in &file.urls { let mut row = BTreeMap::new(); row.insert("path".to_string(), file.path.clone()); row.insert("url".to_string(), u.url.clone()); - row.insert("start".to_string(), u.start_line.to_string()); - row.insert("end".to_string(), u.end_line.to_string()); + row.insert("start".to_string(), u.start_line.get().to_string()); + row.insert("end".to_string(), u.end_line.get().to_string()); url_rows.push(row); } diff --git a/src/output/mod.rs b/src/output/mod.rs index 0ebe33bd2..3c37ebfbd 100644 --- a/src/output/mod.rs +++ b/src/output/mod.rs @@ -119,8 +119,8 @@ mod tests { use crate::models::{ Author, Copyright, ExtraData, FileInfo, FileType, GitSha1, Header, Holder, - LicenseDetection, Match, Md5Digest, OutputEmail, OutputURL, PackageData, Sha256Digest, - SystemEnvironment, + LicenseDetection, LineNumber, Match, Md5Digest, OutputEmail, OutputURL, PackageData, + Sha256Digest, SystemEnvironment, }; #[test] @@ -215,8 +215,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("src/main.rs".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(1), @@ -474,8 +474,8 @@ mod tests { license_expression_spdx: "LicenseRef-scancode-unknown-license-reference" .to_string(), from_file: Some("src/main.rs".to_string()), - start_line: 1, - end_line: 2, + start_line: LineNumber::ONE, + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(4), @@ -953,8 +953,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("src/main.rs".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1224,8 +1224,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: None, score: 100.0, matched_length: None, @@ -1243,28 +1243,28 @@ mod tests { vec![], vec![Copyright { copyright: "Copyright (c) Example".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }], vec![Holder { holder: "Example Org".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }], vec![Author { author: "Jane Doe".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }], vec![OutputEmail { email: "jane@example.com".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }], vec![OutputURL { url: "https://example.com".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }], vec![], vec![], diff --git a/src/output/spdx.rs b/src/output/spdx.rs index 0593ee34d..ab58b8b86 100644 --- a/src/output/spdx.rs +++ b/src/output/spdx.rs @@ -483,7 +483,7 @@ fn spdx_ids_from_expression(expression: &str) -> Vec { #[cfg(test)] mod tests { use super::*; - use crate::models::{LicenseDetection, PackageData, PackageType}; + use crate::models::{LicenseDetection, LineNumber, PackageData, PackageType}; #[test] fn spdx_file_license_info_includes_manifest_package_data_detections() { @@ -522,8 +522,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), diff --git a/src/parsers/debian.rs b/src/parsers/debian.rs index aca32da37..3ded837a4 100644 --- a/src/parsers/debian.rs +++ b/src/parsers/debian.rs @@ -38,8 +38,8 @@ use packageurl::PackageUrl; use regex::Regex; use crate::models::{ - DatasourceId, Dependency, FileReference, LicenseDetection, Md5Digest, PackageData, PackageType, - Party, + DatasourceId, Dependency, FileReference, LicenseDetection, LineNumber, Md5Digest, PackageData, + PackageType, Party, }; use crate::parsers::rfc822::{self, Rfc822Metadata}; use crate::parsers::utils::{read_file_to_string, split_name_email}; @@ -1595,10 +1595,11 @@ fn build_primary_license_detection( line_no: usize, ) -> LicenseDetection { let normalized = normalize_debian_license_name(license_name); + let line = LineNumber::new(line_no).unwrap(); build_declared_license_detection( &normalized, - DeclaredLicenseMatchMetadata::new(&matched_text, line_no, line_no), + DeclaredLicenseMatchMetadata::new(&matched_text, line, line), ) } @@ -3268,8 +3269,8 @@ License: LGPL-2.1 primary.matches[0].matched_text.as_deref(), Some("License: GPL-2+") ); - assert_eq!(primary.matches[0].start_line, 47); - assert_eq!(primary.matches[0].end_line, 47); + assert_eq!(primary.matches[0].start_line, LineNumber::new(47).unwrap()); + assert_eq!(primary.matches[0].end_line, LineNumber::new(47).unwrap()); } #[test] @@ -3285,12 +3286,12 @@ License: LGPL-2.1 primary.matches[0].matched_text.as_deref(), Some("License: LGPL-2.1") ); - assert_eq!(primary.matches[0].start_line, 11); + assert_eq!(primary.matches[0].start_line, LineNumber::new(11).unwrap()); let ordered_lines: Vec = pkg .other_license_detections .iter() - .map(|detection| detection.matches[0].start_line) + .map(|detection| detection.matches[0].start_line.get()) .collect(); assert_eq!(ordered_lines, vec![15, 19, 23, 25]); @@ -3333,8 +3334,11 @@ License: LGPL-2.1 .rev() .find(|detection| detection.matches[0].matched_text.as_deref() == Some("License: Zlib")) .expect("bottom standalone Zlib license paragraph should be detected"); - assert_eq!(last_zlib.matches[0].start_line, 732); - assert_eq!(last_zlib.matches[0].end_line, 732); + assert_eq!( + last_zlib.matches[0].start_line, + LineNumber::new(732).unwrap() + ); + assert_eq!(last_zlib.matches[0].end_line, LineNumber::new(732).unwrap()); } #[test] @@ -3349,8 +3353,8 @@ License: LGPL-2.1 primary.matches[0].matched_text.as_deref(), Some("License: LGPL-3+ or GPL-2+") ); - assert_eq!(primary.matches[0].start_line, 8); - assert_eq!(primary.matches[0].end_line, 8); + assert_eq!(primary.matches[0].start_line, LineNumber::new(8).unwrap()); + assert_eq!(primary.matches[0].end_line, LineNumber::new(8).unwrap()); assert!(pkg.other_license_detections.iter().any(|detection| { detection.matches[0].matched_text.as_deref() == Some("License: GPL-2+") @@ -3368,7 +3372,7 @@ License: LGPL-2.1 primary.matches[0].matched_text.as_deref(), Some("License: GPL-2+") ); - assert_eq!(primary.matches[0].start_line, 7); + assert_eq!(primary.matches[0].start_line, LineNumber::new(7).unwrap()); } #[test] diff --git a/src/parsers/license_normalization.rs b/src/parsers/license_normalization.rs index fcc5801c2..685ff38ba 100644 --- a/src/parsers/license_normalization.rs +++ b/src/parsers/license_normalization.rs @@ -7,7 +7,7 @@ use crate::license_detection::expression::{ LicenseExpression, parse_expression, simplify_expression, }; use crate::license_detection::index::LicenseIndex; -use crate::models::{LicenseDetection, Match, PackageData}; +use crate::models::{LicenseDetection, LineNumber, Match, PackageData}; use crate::utils::spdx::{ExpressionRelation, combine_license_expressions_with_relation}; pub(crate) const PARSER_DECLARED_MATCHER: &str = "parser-declared-license"; @@ -46,13 +46,13 @@ impl NormalizedDeclaredLicense { #[derive(Debug, Clone, Copy)] pub(crate) struct DeclaredLicenseMatchMetadata<'a> { pub(crate) matched_text: &'a str, - pub(crate) start_line: usize, - pub(crate) end_line: usize, + pub(crate) start_line: LineNumber, + pub(crate) end_line: LineNumber, pub(crate) referenced_filenames: Option<&'a [&'a str]>, } impl<'a> DeclaredLicenseMatchMetadata<'a> { - pub(crate) fn new(matched_text: &'a str, start_line: usize, end_line: usize) -> Self { + pub(crate) fn new(matched_text: &'a str, start_line: LineNumber, end_line: LineNumber) -> Self { Self { matched_text, start_line, @@ -67,7 +67,7 @@ impl<'a> DeclaredLicenseMatchMetadata<'a> { } pub(crate) fn single_line(matched_text: &'a str) -> Self { - Self::new(matched_text, 1, 1) + Self::new(matched_text, LineNumber::ONE, LineNumber::ONE) } } @@ -298,8 +298,8 @@ pub(crate) fn finalize_package_declared_license_references(package_data: &mut Pa license_expression_spdx: "LicenseRef-scancode-unknown-license-reference" .to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some(PARSER_DECLARED_MATCHER.to_string()), score: 100.0, matched_length: Some(statement.split_whitespace().count()), @@ -643,14 +643,21 @@ mod tests { fn test_build_declared_license_detection_uses_parser_matcher() { let detection = build_declared_license_detection( &NormalizedDeclaredLicense::new("mit", "MIT"), - DeclaredLicenseMatchMetadata::new("MIT", 4, 4), + DeclaredLicenseMatchMetadata::new( + "MIT", + LineNumber::new(4).unwrap(), + LineNumber::new(4).unwrap(), + ), ); assert_eq!( detection.matches[0].matcher.as_deref(), Some(PARSER_DECLARED_MATCHER) ); - assert_eq!(detection.matches[0].start_line, 4); + assert_eq!( + detection.matches[0].start_line, + LineNumber::new(4).expect("valid") + ); assert_eq!(detection.matches[0].matched_text.as_deref(), Some("MIT")); } } diff --git a/src/post_processing/classify_test.rs b/src/post_processing/classify_test.rs index 26c491090..b19278841 100644 --- a/src/post_processing/classify_test.rs +++ b/src/post_processing/classify_test.rs @@ -2,7 +2,9 @@ use std::fs; use super::test_utils::{dir, file, package, scan_and_assemble_with_keyfiles}; use super::*; -use crate::models::{Copyright, DatasourceId, FileReference, Holder, Match, Package, PackageType}; +use crate::models::{ + Copyright, DatasourceId, FileReference, Holder, LineNumber, Match, Package, PackageType, +}; #[test] fn classify_key_files_marks_nested_ruby_license_from_file_references() { @@ -29,13 +31,13 @@ fn classify_key_files_marks_nested_ruby_license_from_file_references() { license_file.license_expression = Some("Apache-2.0".to_string()); license_file.copyrights = vec![Copyright { copyright: "Copyright (c) 2019 Chef Software Inc.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; license_file.holders = vec![Holder { holder: "Chef Software Inc.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; license_file.license_detections = vec![crate::models::LicenseDetection { license_expression: "apache-2.0".to_string(), @@ -44,8 +46,8 @@ fn classify_key_files_marks_nested_ruby_license_from_file_references() { license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("inspec-6.8.2/inspec-bin/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: None, score: 100.0, matched_length: Some(161), diff --git a/src/post_processing/mod.rs b/src/post_processing/mod.rs index e96b4bc57..2efe6cc2d 100644 --- a/src/post_processing/mod.rs +++ b/src/post_processing/mod.rs @@ -550,6 +550,7 @@ fn is_good_match(license_match: &Match) -> bool { #[cfg(test)] mod tests { use super::is_good_match; + use crate::models::LineNumber; use crate::models::file_info::Match; fn make_match(score: f64, coverage: Option, relevance: Option) -> Match { @@ -557,8 +558,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score, matched_length: Some(3), diff --git a/src/post_processing/output_test.rs b/src/post_processing/output_test.rs index 97595961f..0127cca26 100644 --- a/src/post_processing/output_test.rs +++ b/src/post_processing/output_test.rs @@ -6,7 +6,9 @@ use super::*; use crate::assembly; use crate::license_detection::index::{IndexedRuleMetadata, LicenseIndex}; use crate::license_detection::models::{License as RuntimeLicense, Rule, RuleKind}; -use crate::models::{Copyright, Holder, Match, Package, PackageData, PackageType, Tallies}; +use crate::models::{ + Copyright, Holder, LineNumber, Match, Package, PackageData, PackageType, Tallies, +}; use crate::scan_result_shaping::normalize_paths; use serde_json::json; @@ -120,8 +122,8 @@ fn collect_top_level_license_references_includes_clues_packages_and_sorted_dedup license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 2, + start_line: LineNumber::ONE, + end_line: LineNumber::new(2).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -140,8 +142,8 @@ fn collect_top_level_license_references_includes_clues_packages_and_sorted_dedup license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/NOTICE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(4), @@ -296,8 +298,8 @@ fn collect_top_level_license_references_marks_synthetic_spdx_rules() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-spdx-id".to_string()), score: 100.0, matched_length: Some(1), @@ -346,8 +348,8 @@ fn collect_top_level_license_references_applies_custom_license_url_template() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -411,8 +413,8 @@ fn collect_top_level_license_references_preserves_rule_metadata() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(2), @@ -454,8 +456,8 @@ fn apply_local_file_reference_following_resolves_root_license_file() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(100), @@ -480,8 +482,8 @@ fn apply_local_file_reference_following_resolves_root_license_file() { license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/src/notice.js".to_string()), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -528,8 +530,8 @@ fn apply_local_file_reference_following_prefers_root_license_for_imperfect_subdi license_expression: "npsl-exception-0.95".to_string(), license_expression_spdx: "LicenseRef-scancode-npsl-exception-0.95".to_string(), from_file: Some("LICENSE".to_string()), - start_line: 1, - end_line: 582, + start_line: LineNumber::ONE, + end_line: LineNumber::new(582).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(4720), @@ -554,8 +556,8 @@ fn apply_local_file_reference_following_prefers_root_license_for_imperfect_subdi license_expression: "bsd-new".to_string(), license_expression_spdx: "BSD-3-Clause".to_string(), from_file: Some("third_party/LICENSE".to_string()), - start_line: 1, - end_line: 30, + start_line: LineNumber::ONE, + end_line: LineNumber::new(30).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(150), @@ -580,8 +582,8 @@ fn apply_local_file_reference_following_prefers_root_license_for_imperfect_subdi license_expression: "gpl-1.0-plus OR mit".to_string(), license_expression_spdx: "GPL-1.0-or-later OR MIT".to_string(), from_file: Some("src/FPEngine.h".to_string()), - start_line: 49, - end_line: 57, + start_line: LineNumber::new(49).unwrap(), + end_line: LineNumber::new(57).unwrap(), matcher: Some("3-seq".to_string()), score: 41.79, matched_length: Some(28), @@ -641,8 +643,8 @@ fn apply_local_file_reference_following_does_not_reuse_followed_license_as_secon license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(100), @@ -669,8 +671,8 @@ fn apply_local_file_reference_following_does_not_reuse_followed_license_as_secon license_expression_spdx: "LicenseRef-scancode-unknown-license-reference" .to_string(), from_file: Some("project/ncat/LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -686,8 +688,8 @@ fn apply_local_file_reference_following_does_not_reuse_followed_license_as_secon license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(100), @@ -713,8 +715,8 @@ fn apply_local_file_reference_following_does_not_reuse_followed_license_as_secon license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/ncat/ncat_core.h".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -766,8 +768,8 @@ fn apply_local_file_reference_following_requires_exact_filename_match() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(100), @@ -792,8 +794,8 @@ fn apply_local_file_reference_following_requires_exact_filename_match() { license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/src/notice.js".to_string()), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -835,8 +837,8 @@ fn apply_local_file_reference_following_does_not_search_unrelated_top_level_dire license_expression: "bsd-new".to_string(), license_expression_spdx: "BSD-3-Clause".to_string(), from_file: Some("libssh2/COPYING".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(100), @@ -861,8 +863,8 @@ fn apply_local_file_reference_following_does_not_search_unrelated_top_level_dire license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("docs/3rd-party-licenses.txt".to_string()), - start_line: 10, - end_line: 10, + start_line: LineNumber::new(10).unwrap(), + end_line: LineNumber::new(10).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -907,8 +909,8 @@ fn apply_local_file_reference_following_drops_unknown_intro_from_resolved_target license_expression_spdx: "LicenseRef-scancode-unknown-license-reference" .to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 50.0, matched_length: Some(2), @@ -930,8 +932,8 @@ fn apply_local_file_reference_following_drops_unknown_intro_from_resolved_target license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 5, - end_line: 205, + start_line: LineNumber::new(5).unwrap(), + end_line: LineNumber::new(205).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(1584), @@ -957,8 +959,8 @@ fn apply_local_file_reference_following_drops_unknown_intro_from_resolved_target license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/src/notice.js".to_string()), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -1009,8 +1011,8 @@ fn apply_local_file_reference_following_resolves_files_beside_manifest() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/demo.dist-info/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(100), @@ -1036,8 +1038,8 @@ fn apply_local_file_reference_following_resolves_files_beside_manifest() { license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/demo/__init__.py".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -1080,8 +1082,8 @@ fn apply_package_reference_following_resolves_manifest_origin_local_file() { license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 5, - end_line: 5, + start_line: LineNumber::new(5).unwrap(), + end_line: LineNumber::new(5).unwrap(), matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -1114,8 +1116,8 @@ fn apply_package_reference_following_resolves_manifest_origin_local_file() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(100), @@ -1165,8 +1167,8 @@ fn apply_package_reference_following_resolves_absolute_rootfs_license_reference( license_expression: "gpl-2.0".to_string(), license_expression_spdx: "GPL-2.0-only".to_string(), from_file: Some("usr/share/common-licenses/GPL-2".to_string()), - start_line: 1, - end_line: 339, + start_line: LineNumber::ONE, + end_line: LineNumber::new(339).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(2931), @@ -1191,8 +1193,8 @@ fn apply_package_reference_following_resolves_absolute_rootfs_license_reference( license_expression: "gpl-2.0-plus".to_string(), license_expression_spdx: "GPL-2.0-or-later".to_string(), from_file: Some("usr/sbin/service".to_string()), - start_line: 16, - end_line: 31, + start_line: LineNumber::new(16).unwrap(), + end_line: LineNumber::new(31).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(139), @@ -1269,8 +1271,8 @@ fn apply_package_reference_following_falls_back_to_root_when_package_missing() { license_expression: "gpl-3.0".to_string(), license_expression_spdx: "GPL-3.0-only".to_string(), from_file: Some("project/COPYING".to_string()), - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(50), @@ -1295,8 +1297,8 @@ fn apply_package_reference_following_falls_back_to_root_when_package_missing() { license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/po/en_US.po".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(5), @@ -1338,8 +1340,8 @@ fn apply_package_reference_following_falls_back_past_nested_root_to_repo_root() license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(100), @@ -1364,8 +1366,8 @@ fn apply_package_reference_following_falls_back_past_nested_root_to_repo_root() license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("docs/man-xlate/nmap-id.1".to_string()), - start_line: 100, - end_line: 100, + start_line: LineNumber::new(100).unwrap(), + end_line: LineNumber::new(100).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(2), @@ -1414,8 +1416,8 @@ fn apply_package_reference_following_inherits_license_from_package_context() { license_expression: "bsd-new".to_string(), license_expression_spdx: "BSD-3-Clause".to_string(), from_file: Some("project/PKG-INFO".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 99.0, matched_length: Some(5), @@ -1441,8 +1443,8 @@ fn apply_package_reference_following_inherits_license_from_package_context() { license_expression: "free-unknown".to_string(), license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(), from_file: Some("project/locale/django.po".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(11), @@ -1489,8 +1491,8 @@ fn apply_package_reference_following_falls_back_to_root_for_missing_package_refe license_expression: "gpl-3.0".to_string(), license_expression_spdx: "GPL-3.0-only".to_string(), from_file: Some("project/COPYING".to_string()), - start_line: 1, - end_line: 10, + start_line: LineNumber::ONE, + end_line: LineNumber::new(10).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(50), @@ -1515,8 +1517,8 @@ fn apply_package_reference_following_falls_back_to_root_for_missing_package_refe license_expression: "free-unknown".to_string(), license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(), from_file: Some("project/po/en_US.po".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(5), @@ -1565,8 +1567,8 @@ fn apply_package_reference_following_leaves_ambiguous_multi_package_file_unresol license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/a/PKG-INFO".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(5), @@ -1591,8 +1593,8 @@ fn apply_package_reference_following_leaves_ambiguous_multi_package_file_unresol license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("project/b/PKG-INFO".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(5), @@ -1618,8 +1620,8 @@ fn apply_package_reference_following_leaves_ambiguous_multi_package_file_unresol license_expression: "free-unknown".to_string(), license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(), from_file: Some("project/shared/locale.po".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(11), @@ -1661,8 +1663,8 @@ fn collect_top_level_license_detections_groups_file_detections_and_preserves_pat license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1686,8 +1688,8 @@ fn collect_top_level_license_detections_groups_file_detections_and_preserves_pat license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/other.rs".to_string()), - start_line: 4, - end_line: 6, + start_line: LineNumber::new(4).unwrap(), + end_line: LineNumber::new(6).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1711,8 +1713,8 @@ fn collect_top_level_license_detections_groups_file_detections_and_preserves_pat license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("project/src/apache.rs".to_string()), - start_line: 1, - end_line: 12, + start_line: LineNumber::ONE, + end_line: LineNumber::new(12).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(120), @@ -1757,8 +1759,8 @@ fn collect_top_level_license_detections_counts_same_identifier_regions_in_one_fi license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1780,8 +1782,8 @@ fn collect_top_level_license_detections_counts_same_identifier_regions_in_one_fi license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 20, - end_line: 25, + start_line: LineNumber::new(20).unwrap(), + end_line: LineNumber::new(25).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(12), @@ -1816,8 +1818,8 @@ fn collect_top_level_license_detections_deduplicates_identical_regions() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1839,8 +1841,8 @@ fn collect_top_level_license_detections_deduplicates_identical_regions() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(10), @@ -1874,8 +1876,8 @@ fn collect_top_level_license_detections_recomputes_empty_expression_from_matches license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1910,8 +1912,8 @@ fn collect_top_level_license_detections_includes_package_origin_detections() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -1933,8 +1935,8 @@ fn collect_top_level_license_detections_includes_package_origin_detections() { license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: None, - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -1974,8 +1976,8 @@ fn collect_top_level_license_detections_prefers_later_logged_representative() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1999,8 +2001,8 @@ fn collect_top_level_license_detections_prefers_later_logged_representative() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/other.rs".to_string()), - start_line: 4, - end_line: 6, + start_line: LineNumber::new(4).unwrap(), + end_line: LineNumber::new(6).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -2370,8 +2372,8 @@ fn create_output_preserves_top_level_license_detections_from_context() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(20), @@ -2465,8 +2467,8 @@ fn create_output_gates_summary_tallies_and_generated_sections() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some(license_rel.clone()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -2599,8 +2601,8 @@ fn create_output_score_only_keeps_clarity_without_full_summary_fields() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -2666,8 +2668,8 @@ fn create_output_preserves_file_level_license_clues_in_json_shape() { license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/NOTICE".to_string()), - start_line: 1, - end_line: 2, + start_line: LineNumber::ONE, + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(19), @@ -2844,13 +2846,13 @@ fn create_output_promotes_package_metadata_without_summary_flags() { license.for_packages = vec![package_uid.clone()]; license.copyrights = vec![Copyright { copyright: "Copyright Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; license.holders = vec![Holder { holder: "Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let package = Package { package_uid, @@ -2922,8 +2924,8 @@ fn create_output_summary_still_resolves_after_strip_root_normalization() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/demo.gemspec".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-spdx-id".to_string()), score: 100.0, matched_length: Some(1), diff --git a/src/post_processing/reference_following.rs b/src/post_processing/reference_following.rs index 9daf3281b..6f1747bcc 100644 --- a/src/post_processing/reference_following.rs +++ b/src/post_processing/reference_following.rs @@ -179,7 +179,7 @@ fn public_detection_region_key( .iter() .map(|match_item| match_item.end_line) .max()?; - Some((owning_path.to_string(), start_line, end_line)) + Some((owning_path.to_string(), start_line.get(), end_line.get())) } pub(super) fn build_reference_follow_snapshot( @@ -1217,7 +1217,7 @@ fn internal_match_to_public( #[cfg(test)] mod tests { use super::{apply_package_reference_following, collect_top_level_license_detections}; - use crate::models::Match; + use crate::models::{LineNumber, Match}; use crate::post_processing::test_utils::file; #[test] @@ -1230,8 +1230,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1255,8 +1255,8 @@ mod tests { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/other.rs".to_string()), - start_line: 4, - end_line: 6, + start_line: LineNumber::new(4).unwrap(), + end_line: LineNumber::new(6).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -1316,8 +1316,8 @@ mod tests { license_expression: "ofl-1.1".to_string(), license_expression_spdx: "OFL-1.1".to_string(), from_file: Some("fonts/OFL.txt".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(10), diff --git a/src/post_processing/summary/test.rs b/src/post_processing/summary/test.rs index f3cd32ab9..ed6580303 100644 --- a/src/post_processing/summary/test.rs +++ b/src/post_processing/summary/test.rs @@ -5,7 +5,8 @@ use super::super::package_metadata_promotion::promote_package_metadata_from_key_ use super::super::test_utils::{dir, file, package}; use super::*; use crate::models::{ - Copyright, DatasourceId, FileReference, Holder, Match, Package, PackageType, TallyEntry, + Copyright, DatasourceId, FileReference, Holder, LineNumber, Match, Package, PackageType, + TallyEntry, }; #[test] @@ -38,8 +39,8 @@ fn key_file_license_clues_feed_summary_without_mutating_package_license_provenan license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("inspec-6.8.2/inspec-bin/LICENSE".to_string()), - start_line: 1, - end_line: 20, + start_line: LineNumber::ONE, + end_line: LineNumber::new(20).unwrap(), matcher: None, score: 100.0, matched_length: Some(161), @@ -56,13 +57,13 @@ fn key_file_license_clues_feed_summary_without_mutating_package_license_provenan }]; license_file.copyrights = vec![Copyright { copyright: "Copyright (c) 2019 Chef Software Inc.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; license_file.holders = vec![Holder { holder: "Chef Software Inc.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut files = vec![metadata_file, license_file]; @@ -111,8 +112,8 @@ fn manifest_declared_license_survives_into_package_and_summary() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("demo/demo.gemspec".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: None, score: 100.0, matched_length: None, @@ -141,8 +142,8 @@ fn manifest_declared_license_survives_into_package_and_summary() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("demo/demo.gemspec".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-spdx-id".to_string()), score: 100.0, matched_length: Some(1), @@ -194,8 +195,8 @@ fn summary_other_license_expressions_include_license_clues() { license_expression: "unknown-spdx".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-spdx".to_string(), from_file: Some("project/NOTICE".to_string()), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 65.0, matched_length: Some(2), @@ -232,8 +233,8 @@ fn compute_summary_includes_package_other_license_detections_as_other_expression license_expression: "gpl-2.0-only".to_string(), license_expression_spdx: "GPL-2.0-only".to_string(), from_file: Some("project/package.json".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -279,8 +280,8 @@ fn compute_summary_uses_manifest_package_license_detections_when_file_detections license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -315,8 +316,8 @@ fn compute_summary_prefers_file_license_detections_over_duplicate_package_data_d license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -366,8 +367,8 @@ fn compute_summary_deduplicates_duplicate_other_license_package_data_entries_per license_expression: "gpl-2.0-only".to_string(), license_expression_spdx: "GPL-2.0-only".to_string(), from_file: Some("project/package.json".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -417,8 +418,8 @@ fn compute_summary_deduplicates_duplicate_primary_package_data_entries_per_file( license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -478,8 +479,8 @@ fn compute_summary_uses_root_prefixed_top_level_key_files() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -562,8 +563,8 @@ fn compute_summary_prefers_package_origin_info_and_preserves_other_tallies() { license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("codebase/apache-2.0.LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -591,8 +592,8 @@ fn compute_summary_prefers_package_origin_info_and_preserves_other_tallies() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("codebase/mit.LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -627,8 +628,8 @@ fn compute_summary_resolves_joined_primary_license_without_ambiguity() { readme.license_expression = Some("apache-2.0 AND (apache-2.0 OR mit)".to_string()); readme.copyrights = vec![Copyright { copyright: "Copyright Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut apache = file("codebase/apache-2.0.LICENSE"); @@ -643,8 +644,8 @@ fn compute_summary_resolves_joined_primary_license_without_ambiguity() { license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("codebase/apache-2.0.LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -672,8 +673,8 @@ fn compute_summary_resolves_joined_primary_license_without_ambiguity() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("codebase/mit.LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -708,8 +709,8 @@ fn compute_summary_penalizes_conflicting_non_key_licenses_without_false_ambiguit readme.is_top_level = true; readme.copyrights = vec![Copyright { copyright: "Copyright Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut mit = file("codebase/mit.LICENSE"); @@ -724,8 +725,8 @@ fn compute_summary_penalizes_conflicting_non_key_licenses_without_false_ambiguit license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("codebase/mit.LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -750,8 +751,8 @@ fn compute_summary_penalizes_conflicting_non_key_licenses_without_false_ambiguit license_expression: "gpl-2.0-only".to_string(), license_expression_spdx: "GPL-2.0-only".to_string(), from_file: Some("codebase/tests/test_a.py".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(10), @@ -790,13 +791,13 @@ fn compute_summary_uses_package_datafile_holders_before_global_holder_fallback() setup_py.holders = vec![ Holder { holder: "Google".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }, Holder { holder: "Fraunhofer FKIE".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }, ]; @@ -806,8 +807,8 @@ fn compute_summary_uses_package_datafile_holders_before_global_holder_fallback() readme.is_top_level = true; readme.holders = vec![Holder { holder: "Example Corporation".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let summary = compute_summary(&[setup_py, readme], &[package]).expect("summary exists"); @@ -835,8 +836,8 @@ fn compute_summary_prefers_package_copyright_holders_over_package_resource_holde nuspec.for_packages = vec![package.package_uid.clone()]; nuspec.holders = vec![Holder { holder: "Different Holder".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let summary = compute_summary(&[nuspec], &[package]).expect("summary exists"); @@ -870,8 +871,8 @@ fn compute_summary_keeps_null_other_license_expressions_when_declared_expression license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(10), @@ -905,16 +906,16 @@ fn compute_summary_keeps_null_other_holders_and_removes_declared_holder_only() { readme.is_top_level = true; readme.holders = vec![Holder { holder: "Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut authors = file("project/AUTHORS"); authors.is_community = true; authors.holders = vec![Holder { holder: "Demo Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut license = file("project/LICENSE"); @@ -932,14 +933,14 @@ fn compute_summary_keeps_holder_tallies_when_no_declared_holder_exists() { let mut source_one = file("project/src/main.c"); source_one.holders = vec![Holder { holder: "Members of the Gmerlin project".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut source_two = file("project/src/helper.c"); source_two.holders = vec![Holder { holder: "Members of the Gmerlin project".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let summary = compute_summary(&[source_one, source_two], &[]).expect("summary exists"); assert_eq!(summary.declared_holder.as_deref(), Some("")); @@ -954,15 +955,15 @@ fn compute_summary_removes_punctuation_only_holder_variants_from_other_holders() readme.is_top_level = true; readme.holders = vec![Holder { holder: "Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut notice = file("project/NOTICE"); notice.holders = vec![Holder { holder: "Example Corp".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut license = file("project/LICENSE"); @@ -1053,8 +1054,8 @@ fn compute_summary_combines_package_licenses_when_present_datafile_is_not_key_cl license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("codebase/cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-spdx-id".to_string()), score: 100.0, matched_length: Some(1), @@ -1096,8 +1097,8 @@ fn compute_summary_serializes_empty_declared_holder_when_none_found() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("pip-22.0.4/PKG-INFO".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-spdx-id".to_string()), score: 100.0, matched_length: Some(1), @@ -1131,8 +1132,8 @@ fn compute_summary_joins_multiple_holders_from_single_top_level_license_file() { license_expression: "jetty".to_string(), license_expression_spdx: "LicenseRef-scancode-jetty".to_string(), from_file: Some("codebase/jetty.LICENSE".to_string()), - start_line: 1, - end_line: 132, + start_line: LineNumber::ONE, + end_line: LineNumber::new(132).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(996), @@ -1149,19 +1150,19 @@ fn compute_summary_joins_multiple_holders_from_single_top_level_license_file() { }]; license.copyrights = vec![Copyright { copyright: "Copyright Mort Bay and Sun Microsystems.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; license.holders = vec![ Holder { holder: "Mort Bay Consulting Pty. Ltd. (Australia) and others".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }, Holder { holder: "Sun Microsystems".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }, ]; let summary = compute_summary(&[license], &[]).expect("summary exists"); @@ -1182,8 +1183,8 @@ fn compute_score_mode_ignores_package_declared_license_without_key_file_license_ package_json.for_packages = vec![package.package_uid.clone()]; package_json.copyrights = vec![Copyright { copyright: "Copyright Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let files = vec![package_json]; let indexes = OutputIndexes::build(&files, None, false, OutputIndexMode::Full); @@ -1205,8 +1206,8 @@ fn compute_score_mode_without_license_text_returns_zero_with_copyright_only() { package_json.for_packages = vec![package.package_uid.clone()]; package_json.copyrights = vec![Copyright { copyright: "Copyright Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let files = vec![package_json]; let indexes = OutputIndexes::build(&files, None, false, OutputIndexMode::Full); @@ -1247,8 +1248,8 @@ fn compute_score_mode_uses_single_joined_expression_without_ambiguity() { license_expression: "mit OR apache-2.0".to_string(), license_expression_spdx: "MIT OR Apache-2.0".to_string(), from_file: Some("no_license_ambiguity/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(5), @@ -1265,8 +1266,8 @@ fn compute_score_mode_uses_single_joined_expression_without_ambiguity() { }]; cargo.copyrights = vec![Copyright { copyright: "Copyright The Rand Project Developers.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut apache = file("no_license_ambiguity/LICENSE-APACHE"); apache.is_legal = true; @@ -1280,8 +1281,8 @@ fn compute_score_mode_uses_single_joined_expression_without_ambiguity() { license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("no_license_ambiguity/LICENSE-APACHE".to_string()), - start_line: 1, - end_line: 176, + start_line: LineNumber::ONE, + end_line: LineNumber::new(176).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(1410), @@ -1308,8 +1309,8 @@ fn compute_score_mode_uses_single_joined_expression_without_ambiguity() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("no_license_ambiguity/LICENSE-MIT".to_string()), - start_line: 1, - end_line: 18, + start_line: LineNumber::ONE, + end_line: LineNumber::new(18).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(161), @@ -1361,8 +1362,8 @@ fn compute_score_mode_does_not_treat_with_expression_as_covering_base_license() license_expression: "gpl-2.0 WITH classpath-exception-2.0".to_string(), license_expression_spdx: "GPL-2.0-only WITH Classpath-exception-2.0".to_string(), from_file: Some("with_exception_ambiguity/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(5), @@ -1379,8 +1380,8 @@ fn compute_score_mode_does_not_treat_with_expression_as_covering_base_license() }]; manifest.copyrights = vec![Copyright { copyright: "Copyright Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut gpl = file("with_exception_ambiguity/LICENSE-GPL"); @@ -1395,8 +1396,8 @@ fn compute_score_mode_does_not_treat_with_expression_as_covering_base_license() license_expression: "gpl-2.0".to_string(), license_expression_spdx: "GPL-2.0-only".to_string(), from_file: Some("with_exception_ambiguity/LICENSE-GPL".to_string()), - start_line: 1, - end_line: 176, + start_line: LineNumber::ONE, + end_line: LineNumber::new(176).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(1410), @@ -1442,8 +1443,8 @@ fn compute_score_mode_scores_nested_manifest_key_file_without_copyright() { from_file: Some( "jar/META-INF/maven/org.jboss.logging/jboss-logging/pom.xml".to_string(), ), - start_line: 1, - end_line: 2, + start_line: LineNumber::ONE, + end_line: LineNumber::new(2).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(16), @@ -1470,8 +1471,8 @@ fn compute_score_mode_scores_nested_manifest_key_file_without_copyright() { license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("jar/META-INF/LICENSE.txt".to_string()), - start_line: 1, - end_line: 176, + start_line: LineNumber::ONE, + end_line: LineNumber::new(176).unwrap(), matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(1410), diff --git a/src/post_processing/tallies_test.rs b/src/post_processing/tallies_test.rs index fc2244331..27ce435e6 100644 --- a/src/post_processing/tallies_test.rs +++ b/src/post_processing/tallies_test.rs @@ -1,6 +1,8 @@ use super::test_utils::{dir, file}; use super::*; -use crate::models::{Author, Copyright, Holder, Match, PackageData, PackageType, TallyEntry}; +use crate::models::{ + Author, Copyright, Holder, LineNumber, Match, PackageData, PackageType, TallyEntry, +}; #[test] fn compute_tallies_counts_file_findings_and_missing_values() { @@ -15,8 +17,8 @@ fn compute_tallies_counts_file_findings_and_missing_values() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/lib.rs".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: None, score: 100.0, matched_length: None, @@ -33,18 +35,18 @@ fn compute_tallies_counts_file_findings_and_missing_values() { }]; mit_file.copyrights = vec![Copyright { copyright: "Copyright (c) Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; mit_file.holders = vec![Holder { holder: "Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; mit_file.authors = vec![Author { author: "Alice".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut dual_license_file = file("project/src/main.c"); @@ -59,8 +61,8 @@ fn compute_tallies_counts_file_findings_and_missing_values() { license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("project/src/main.c".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: None, score: 100.0, matched_length: None, @@ -82,8 +84,8 @@ fn compute_tallies_counts_file_findings_and_missing_values() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/src/main.c".to_string()), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: None, score: 100.0, matched_length: None, @@ -101,18 +103,18 @@ fn compute_tallies_counts_file_findings_and_missing_values() { ]; dual_license_file.copyrights = vec![Copyright { copyright: "Copyright (c) Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; dual_license_file.holders = vec![Holder { holder: "Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; dual_license_file.authors = vec![Author { author: "Bob".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let empty_file = file("project/README.md"); @@ -150,13 +152,13 @@ fn compute_key_file_tallies_only_counts_key_files_and_drops_missing_values() { key_license.license_expression = Some("apache-2.0".to_string()); key_license.copyrights = vec![Copyright { copyright: "Copyright (c) Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; key_license.holders = vec![Holder { holder: "Example Corp.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut key_readme = file("project/README.md"); @@ -164,8 +166,8 @@ fn compute_key_file_tallies_only_counts_key_files_and_drops_missing_values() { key_readme.programming_language = Some("Markdown".to_string()); key_readme.authors = vec![Author { author: "Alice".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let mut non_key_source = file("project/src/lib.rs"); @@ -203,8 +205,8 @@ fn compute_tallies_include_package_other_license_detections() { license_expression: "gpl-2.0-only".to_string(), license_expression_spdx: "GPL-2.0-only".to_string(), from_file: Some("project/package.json".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -239,8 +241,8 @@ fn compute_tallies_include_license_clues_in_detected_license_expression() { license_expression: "unknown-spdx".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-spdx".to_string(), from_file: Some("project/NOTICE".to_string()), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 65.0, matched_length: Some(2), @@ -272,8 +274,8 @@ fn compute_key_file_tallies_include_license_clues() { license_expression: "unknown-spdx".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-spdx".to_string(), from_file: Some("project/NOTICE".to_string()), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 65.0, matched_length: Some(2), @@ -310,8 +312,8 @@ fn compute_key_file_tallies_include_package_other_license_detections() { license_expression: "gpl-2.0-only".to_string(), license_expression_spdx: "GPL-2.0-only".to_string(), from_file: Some("project/package.json".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -349,8 +351,8 @@ fn compute_tallies_include_manifest_package_license_detections() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -390,8 +392,8 @@ fn compute_key_file_tallies_include_manifest_package_license_detections() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -426,8 +428,8 @@ fn compute_tallies_do_not_double_count_duplicate_file_and_package_detections() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -474,8 +476,8 @@ fn compute_tallies_deduplicate_duplicate_package_data_entries_per_file() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/Cargo.toml".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(1), @@ -536,18 +538,18 @@ fn compute_tallies_ignores_legal_file_copyright_holder_and_author_noise() { legal.is_legal = true; legal.copyrights = vec![Copyright { copyright: "copyright and related or neighboring rights".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; legal.holders = vec![Holder { holder: "Related Rights".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; legal.authors = vec![Author { author: "be liable for".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let tallies = compute_tallies(&[legal]).expect("tallies exist"); @@ -583,13 +585,13 @@ fn compute_key_file_tallies_excludes_legal_file_copyrights_holders_and_languages legal.programming_language = Some("Text".to_string()); legal.copyrights = vec![Copyright { copyright: "copyright and related or neighboring rights".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; legal.holders = vec![Holder { holder: "Related Rights".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; assert!(compute_key_file_tallies(&[legal]).is_none()); @@ -600,14 +602,14 @@ fn compute_tallies_normalizes_jboss_style_copyright_and_holder_values() { let mut source = file("project/src/lib.java"); source.copyrights = vec![Copyright { copyright: "Copyright 2005, JBoss Inc., and individual contributors as indicated by the @authors tag".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; source.holders = vec![Holder { holder: "JBoss Inc., and individual contributors as indicated by the @authors tag" .to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let tallies = compute_tallies(&[source]).expect("tallies exist"); @@ -627,8 +629,8 @@ fn compute_tallies_strips_leading_years_from_copyright_tallies() { let mut source = file("project/src/zlib.h"); source.copyrights = vec![Copyright { copyright: "Copyright (c) 1995-2013 Jean-loup Gailly and Mark Adler".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let tallies = compute_tallies(&[source]).expect("tallies exist"); @@ -644,8 +646,8 @@ fn compute_tallies_filters_lowercase_author_noise() { let mut source = file("project/src/lib.java"); source.authors = vec![Author { author: "be liable for".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; let tallies = compute_tallies(&[source]).expect("tallies exist"); @@ -673,8 +675,8 @@ fn compute_detailed_tallies_assigns_file_and_directory_rollups() { files[3].programming_language = Some("Rust".to_string()); files[3].authors = vec![Author { author: "Alice".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; files[4].programming_language = Some("Markdown".to_string()); diff --git a/src/scan_result_shaping/core_test.rs b/src/scan_result_shaping/core_test.rs index ccbb31f10..e717fb650 100644 --- a/src/scan_result_shaping/core_test.rs +++ b/src/scan_result_shaping/core_test.rs @@ -1,7 +1,7 @@ use super::*; use crate::models::{ - Author, Copyright, DatasourceId, Dependency, FileReference, OutputEmail, OutputURL, Package, - PackageData, TopLevelDependency, + Author, Copyright, DatasourceId, Dependency, FileReference, LineNumber, OutputEmail, OutputURL, + Package, PackageData, TopLevelDependency, }; use crate::scan_result_shaping::test_fixtures::{dir, file}; use regex::Regex; @@ -50,8 +50,8 @@ fn only_findings_keeps_file_with_findings_and_parent_dirs() { let mut files = vec![dir("project"), file("project/a.txt"), file("project/b.txt")]; files[2].copyrights = vec![Copyright { copyright: "Copyright Example".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; apply_only_findings_filter(&mut files); @@ -68,37 +68,37 @@ fn filter_redundant_clues_dedupes_exact_duplicates() { files[0].authors = vec![ Author { author: "Jane".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }, Author { author: "Jane".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }, ]; files[0].emails = vec![ OutputEmail { email: "a@example.com".to_string(), - start_line: 3, - end_line: 3, + start_line: LineNumber::new(3).unwrap(), + end_line: LineNumber::new(3).unwrap(), }, OutputEmail { email: "a@example.com".to_string(), - start_line: 3, - end_line: 3, + start_line: LineNumber::new(3).unwrap(), + end_line: LineNumber::new(3).unwrap(), }, ]; files[0].urls = vec![ OutputURL { url: "https://example.com".to_string(), - start_line: 4, - end_line: 4, + start_line: LineNumber::new(4).unwrap(), + end_line: LineNumber::new(4).unwrap(), }, OutputURL { url: "https://example.com".to_string(), - start_line: 4, - end_line: 4, + start_line: LineNumber::new(4).unwrap(), + end_line: LineNumber::new(4).unwrap(), }, ]; @@ -115,25 +115,25 @@ fn filter_redundant_clues_keeps_distinct_line_ranges_and_dedupes_copyrights_and_ files[0].copyrights = vec![ Copyright { copyright: "Copyright Example".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }, Copyright { copyright: "Copyright Example".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }, ]; files[0].holders = vec![ crate::models::Holder { holder: "Example Corp".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }, crate::models::Holder { holder: "Example Corp".to_string(), - start_line: 3, - end_line: 3, + start_line: LineNumber::new(3).unwrap(), + end_line: LineNumber::new(3).unwrap(), }, ]; @@ -153,8 +153,8 @@ fn filter_redundant_clues_with_rules_suppresses_ignorable_rule_and_cross_clues() license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(42), @@ -171,28 +171,28 @@ fn filter_redundant_clues_with_rules_suppresses_ignorable_rule_and_cross_clues() }]; files[0].copyrights = vec![Copyright { copyright: "Copyright Example Corp".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].holders = vec![crate::models::Holder { holder: "Example Corp".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].authors = vec![Author { author: "Jane Example".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].emails = vec![OutputEmail { email: "legal@example.com".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].urls = vec![OutputURL { url: "https://example.com/".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; let clue_rule_lookup = HashMap::from([( @@ -225,8 +225,8 @@ fn filter_redundant_clues_with_rules_keeps_non_exact_ignorable_values() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(42), @@ -243,8 +243,8 @@ fn filter_redundant_clues_with_rules_keeps_non_exact_ignorable_values() { }]; files[0].holders = vec![crate::models::Holder { holder: "Example Corp".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; let clue_rule_lookup = HashMap::from([( @@ -266,28 +266,28 @@ fn filter_redundant_clues_suppresses_cross_clues_without_license_rules() { let mut files = vec![file("project/a.txt")]; files[0].copyrights = vec![Copyright { copyright: "Copyright Example https://example.com".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].holders = vec![crate::models::Holder { holder: "Jane Example".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].authors = vec![Author { author: "Jane Example".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].emails = vec![OutputEmail { email: "legal@example.com".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].urls = vec![OutputURL { url: "https://example.com/".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; filter_redundant_clues(&mut files); @@ -310,8 +310,8 @@ fn filter_redundant_clues_with_rules_uses_package_origin_detections() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/package.json".to_string()), - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), matcher: Some("parser-declared-license".to_string()), score: 100.0, matched_length: Some(42), @@ -330,13 +330,13 @@ fn filter_redundant_clues_with_rules_uses_package_origin_detections() { }]; files[0].emails = vec![OutputEmail { email: "legal@example.com".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; files[0].urls = vec![OutputURL { url: "https://example.com/".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; let clue_rule_lookup = HashMap::from([( @@ -364,8 +364,8 @@ fn filter_redundant_clues_with_rules_ignores_low_coverage_matches() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: None, - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(42), @@ -382,8 +382,8 @@ fn filter_redundant_clues_with_rules_ignores_low_coverage_matches() { }]; files[0].emails = vec![OutputEmail { email: "legal@example.com".to_string(), - start_line: 2, - end_line: 2, + start_line: LineNumber::new(2).unwrap(), + end_line: LineNumber::new(2).unwrap(), }]; let clue_rule_lookup = HashMap::from([( @@ -411,14 +411,14 @@ fn ignore_resource_filter_removes_matching_files_and_preserves_needed_dirs() { ]; files[3].authors = vec![Author { author: "Jane Doe".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; files[3].license_expression = Some("mit".to_string()); files[4].holders = vec![crate::models::Holder { holder: "Example Corp".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; files[4].scan_errors = vec!["should still be dropped".to_string()]; @@ -504,8 +504,8 @@ fn normalize_paths_updates_license_match_from_file_paths_too() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/NOTICE".to_string()), - start_line: 1, - end_line: 2, + start_line: LineNumber::ONE, + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(12), @@ -524,8 +524,8 @@ fn normalize_paths_updates_license_match_from_file_paths_too() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(42), @@ -567,8 +567,8 @@ fn normalize_paths_updates_package_level_license_match_from_file_paths_too() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("project/LICENSE".to_string()), - start_line: 1, - end_line: 5, + start_line: LineNumber::ONE, + end_line: LineNumber::new(5).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(42), @@ -590,8 +590,8 @@ fn normalize_paths_updates_package_level_license_match_from_file_paths_too() { license_expression: "apache-2.0".to_string(), license_expression_spdx: "Apache-2.0".to_string(), from_file: Some("project/NOTICE".to_string()), - start_line: 1, - end_line: 3, + start_line: LineNumber::ONE, + end_line: LineNumber::new(3).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(30), @@ -665,8 +665,8 @@ fn only_findings_keeps_clue_only_files() { license_expression: "unknown-license-reference".to_string(), license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(), from_file: Some("project/NOTICE".to_string()), - start_line: 1, - end_line: 2, + start_line: LineNumber::ONE, + end_line: LineNumber::new(2).unwrap(), matcher: Some("2-aho".to_string()), score: 100.0, matched_length: Some(19), diff --git a/src/scan_result_shaping/json_input_test.rs b/src/scan_result_shaping/json_input_test.rs index dbee14e94..5d518f46d 100644 --- a/src/scan_result_shaping/json_input_test.rs +++ b/src/scan_result_shaping/json_input_test.rs @@ -1,4 +1,5 @@ use super::*; +use crate::models::LineNumber; use crate::scan_result_shaping::test_fixtures::json_file; use serde_json::json; use std::fs; @@ -88,8 +89,8 @@ fn normalize_loaded_json_scan_applies_strip_root_per_loaded_input() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("archive/root/src/main.rs".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: None, score: 100.0, matched_length: None, @@ -145,8 +146,8 @@ fn normalize_loaded_json_scan_trims_full_root_display_without_absolutizing() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("/tmp/archive/root/src/main.rs".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: None, score: 100.0, matched_length: None, diff --git a/src/scan_result_shaping/mod.rs b/src/scan_result_shaping/mod.rs index d9698ec02..0cbb5450a 100644 --- a/src/scan_result_shaping/mod.rs +++ b/src/scan_result_shaping/mod.rs @@ -12,7 +12,7 @@ use std::path::{Path, PathBuf}; use crate::license_detection::LicenseDetectionEngine; use crate::license_detection::index::LicenseIndex; -use crate::models::{FileInfo, Match, Package, TopLevelDependency}; +use crate::models::{FileInfo, LineNumber, Match, Package, TopLevelDependency}; use anyhow::Result; pub(crate) use json_input::load_and_merge_json_inputs; @@ -120,8 +120,8 @@ pub(crate) type ClueRuleLookup = HashMap; #[derive(Debug, Clone)] struct IgnorableSpan { - start_line: usize, - end_line: usize, + start_line: LineNumber, + end_line: LineNumber, values: Vec, allow_substring: bool, } @@ -396,8 +396,8 @@ fn collect_rule_ignorables( fn push_ignorable_values( target: &mut Vec, - start_line: usize, - end_line: usize, + start_line: LineNumber, + end_line: LineNumber, values: &[String], trim_slashes: bool, ) { @@ -419,8 +419,8 @@ fn push_ignorable_values( fn matches_ignorable( ignorables: &[IgnorableSpan], - start_line: usize, - end_line: usize, + start_line: LineNumber, + end_line: LineNumber, value: &str, trim_slashes: bool, ) -> bool { diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 545f945c9..4bbb04e6d 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -245,7 +245,7 @@ mod tests { let emails: Vec<(&str, usize)> = scanned .emails .iter() - .map(|email| (email.email.as_str(), email.start_line)) + .map(|email| (email.email.as_str(), email.start_line.get())) .collect(); assert_eq!(emails.len(), 4, "emails: {emails:#?}"); @@ -426,7 +426,13 @@ mod tests { let authors: Vec<(&str, usize, usize)> = scanned .authors .iter() - .map(|author| (author.author.as_str(), author.start_line, author.end_line)) + .map(|author| { + ( + author.author.as_str(), + author.start_line.get(), + author.end_line.get(), + ) + }) .collect(); assert_eq!( diff --git a/src/scanner/process.rs b/src/scanner/process.rs index b73a239c3..f4ae20dd1 100644 --- a/src/scanner/process.rs +++ b/src/scanner/process.rs @@ -26,7 +26,7 @@ use crate::license_detection::models::LicenseMatch as InternalLicenseMatch; use crate::license_detection::query::Query; use crate::models::{ Author, Copyright, DatasourceId, FileInfo, FileInfoBuilder, FileType, Holder, LicenseDetection, - Match, OutputEmail, OutputURL, Sha256Digest, + LineNumber, Match, OutputEmail, OutputURL, Sha256Digest, }; use crate::parsers::utils::split_name_email; use crate::progress::ScanProgress; @@ -687,7 +687,12 @@ fn prune_binary_string_detections( (kept_copyrights, kept_holders, kept_authors) } -fn ranges_overlap(a_start: usize, a_end: usize, b_start: usize, b_end: usize) -> bool { +fn ranges_overlap( + a_start: LineNumber, + a_end: LineNumber, + b_start: LineNumber, + b_end: LineNumber, +) -> bool { a_start <= b_end && b_start <= a_end } @@ -739,8 +744,8 @@ fn extract_binary_string_author_supplements(text_content: &str) -> Vec LicenseMatch { LicenseMatch { rid: 0, license_expression: "mit".to_string(), license_expression_spdx: Some("MIT".to_string()), from_file: None, - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, start_token: 0, end_token: 1, matcher: MatcherKind::Hash, @@ -1659,8 +1666,8 @@ mod tests { detection.matches[0].license_expression_spdx = Some("FSFAP".to_string()); detection.matches[0].rule_identifier = "fsf-ap.LICENSE".to_string(); detection.matches[0].matched_text = None; - detection.matches[0].start_line = 1; - detection.matches[0].end_line = 3; + detection.matches[0].start_line = LineNumber::ONE; + detection.matches[0].end_line = LineNumber::new(3).unwrap(); detection.matches[0].start_token = 0; detection.matches[0].end_token = query.tokens.len(); detection.matches[0].coordinates = diff --git a/tests/output_format_golden.rs b/tests/output_format_golden.rs index 63905d9d7..b2e53cf42 100644 --- a/tests/output_format_golden.rs +++ b/tests/output_format_golden.rs @@ -1,7 +1,7 @@ use provenant::models::{ Copyright, DatasourceId, ExtraData, FacetTallies, FileInfo, FileType, Header, Holder, - Md5Digest, Output, Package, PackageData, PackageType, Party, ResolvedPackage, Sha1Digest, - SystemEnvironment, Tallies, TallyEntry, TopLevelDependency, + LineNumber, Md5Digest, Output, Package, PackageData, PackageType, Party, ResolvedPackage, + Sha1Digest, SystemEnvironment, Tallies, TallyEntry, TopLevelDependency, }; use provenant::{OutputFormat, OutputWriteConfig, OutputWriter, writer_for_format}; use regex::Regex; @@ -107,8 +107,8 @@ fn test_debian_output_matches_local_expected_fixture() { ); file.holders = vec![Holder { holder: "Example Org".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }]; file.license_expression = Some("MIT".to_string()); file.license_detections = vec![provenant::models::LicenseDetection { @@ -118,8 +118,8 @@ fn test_debian_output_matches_local_expected_fixture() { license_expression: "mit".to_string(), license_expression_spdx: "MIT".to_string(), from_file: Some("scan/src/main.rs".to_string()), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, matcher: Some("1-hash".to_string()), score: 100.0, matched_length: Some(1), @@ -1600,13 +1600,13 @@ fn sample_html_simple_output() -> Output { vec![], vec![Copyright { copyright: "Copyright (c) 2000 ACME, Inc.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }], vec![Holder { holder: "ACME, Inc.".to_string(), - start_line: 1, - end_line: 1, + start_line: LineNumber::ONE, + end_line: LineNumber::ONE, }], vec![], vec![], diff --git a/tests/scanner_copyright_credits.rs b/tests/scanner_copyright_credits.rs index 7609ecfed..2f3d1d7f9 100644 --- a/tests/scanner_copyright_credits.rs +++ b/tests/scanner_copyright_credits.rs @@ -68,8 +68,8 @@ fn scanner_matches_structured_credits_fixture() { .into_iter() .map(|author| ExpectedAuthor { author: author.author, - start_line: author.start_line, - end_line: author.end_line, + start_line: author.start_line.get(), + end_line: author.end_line.get(), }) .collect(); diff --git a/tests/scanner_integration.rs b/tests/scanner_integration.rs index 9f1dfd276..7c3e992a4 100644 --- a/tests/scanner_integration.rs +++ b/tests/scanner_integration.rs @@ -1,7 +1,7 @@ use glob::Pattern; use once_cell::sync::Lazy; use provenant::license_detection::LicenseDetectionEngine; -use provenant::models::PackageType; +use provenant::models::{LineNumber, PackageType}; use provenant::parsers::list_parser_types; use provenant::progress::{ProgressMode, ScanProgress}; use provenant::scanner::LicenseScanOptions; @@ -577,13 +577,13 @@ fn test_scanner_detects_emails_and_urls_when_enabled() { assert_eq!(file.emails.len(), 1); assert_eq!(file.emails[0].email, "support@many.org"); - assert_eq!(file.emails[0].start_line, 1); - assert_eq!(file.emails[0].end_line, 1); + assert_eq!(file.emails[0].start_line, LineNumber::ONE); + assert_eq!(file.emails[0].end_line, LineNumber::ONE); assert_eq!(file.urls.len(), 1); assert_eq!(file.urls[0].url, "http://www.acme.dev/docs"); - assert_eq!(file.urls[0].start_line, 2); - assert_eq!(file.urls[0].end_line, 2); + assert_eq!(file.urls[0].start_line, LineNumber::new(2).unwrap()); + assert_eq!(file.urls[0].end_line, LineNumber::new(2).unwrap()); } #[test] @@ -626,13 +626,13 @@ fn test_scanner_detects_copyrights_in_latin1_text() { file.copyrights[0].copyright, "Copyright 2024 François Müller" ); - assert_eq!(file.copyrights[0].start_line, 1); - assert_eq!(file.copyrights[0].end_line, 1); + assert_eq!(file.copyrights[0].start_line, LineNumber::ONE); + assert_eq!(file.copyrights[0].end_line, LineNumber::ONE); assert_eq!(file.holders.len(), 1); assert_eq!(file.holders[0].holder, "François Müller"); - assert_eq!(file.holders[0].start_line, 1); - assert_eq!(file.holders[0].end_line, 1); + assert_eq!(file.holders[0].start_line, LineNumber::ONE); + assert_eq!(file.holders[0].end_line, LineNumber::ONE); } #[test]