From 5774091741c39a4a965960d37e613fd77a661985 Mon Sep 17 00:00:00 2001 From: riddim-developer-bot Date: Sun, 14 Jun 2026 14:43:39 -0400 Subject: [PATCH] [EPAC-2298]: Persist clause-level bill diff data for backend route --- .../internal/adapter/legisinfo/fetcher.go | 36 ++++++++++++++- .../adapter/legisinfo/fetcher_test.go | 44 +++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/backend/bills-indexer/internal/adapter/legisinfo/fetcher.go b/backend/bills-indexer/internal/adapter/legisinfo/fetcher.go index 23e996fa..e712806e 100644 --- a/backend/bills-indexer/internal/adapter/legisinfo/fetcher.go +++ b/backend/bills-indexer/internal/adapter/legisinfo/fetcher.go @@ -158,6 +158,7 @@ func (f *Fetcher) fetchDetail(ctx context.Context, session domain.Session, numbe func (f *Fetcher) enrichVersions(ctx context.Context, session domain.Session, number string, pubs []publicationJSON) []domain.BillVersion { versions := make([]domain.BillVersion, 0, len(pubs)) + var firstXMLURL, firstPDFURL string for i, pub := range pubs { stage := firstNonEmpty(pub.PublicationTypeNameEn, pub.PublicationTypeName) slug := publicationSlug(stage) @@ -171,7 +172,21 @@ func (f *Fetcher) enrichVersions(ctx context.Context, session domain.Session, nu Source: "LEGISinfo publication", SortOrder: i + 1, } - xmlURL, pdfURL := f.fetchDocumentLinks(ctx, htmlURL) + + var xmlURL, pdfURL string + if firstXMLURL == "" { + xmlURL, pdfURL = f.fetchDocumentLinks(ctx, htmlURL) + if xmlURL != "" { + firstXMLURL = xmlURL + } + if pdfURL != "" { + firstPDFURL = pdfURL + } + } else { + xmlURL = constructXMLURL(firstXMLURL, i+1) + pdfURL = constructPDFURL(firstPDFURL, i+1) + } + version.XMLURL = xmlURL version.PDFURL = pdfURL @@ -194,6 +209,25 @@ func (f *Fetcher) enrichVersions(ctx context.Context, session domain.Session, nu return versions } +func constructXMLURL(firstURL string, sortOrder int) string { + if firstURL == "" { + return "" + } + return strings.Replace(firstURL, "_1/", fmt.Sprintf("_%d/", sortOrder), 1) +} + +func constructPDFURL(firstURL string, sortOrder int) string { + if firstURL == "" { + return "" + } + res := strings.Replace(firstURL, "_1/", fmt.Sprintf("_%d/", sortOrder), 1) + res = strings.Replace(res, "_1.PDF", fmt.Sprintf("_%d.PDF", sortOrder), 1) + res = strings.Replace(res, "_1.pdf", fmt.Sprintf("_%d.pdf", sortOrder), 1) + res = strings.Replace(res, "_1.Pdf", fmt.Sprintf("_%d.Pdf", sortOrder), 1) + return res +} + + func (f *Fetcher) fetchDocumentLinks(ctx context.Context, pageURL string) (string, string) { body, err := f.getBytes(ctx, pageURL, "text/html") if err != nil { diff --git a/backend/bills-indexer/internal/adapter/legisinfo/fetcher_test.go b/backend/bills-indexer/internal/adapter/legisinfo/fetcher_test.go index 40fbc6de..e77e3bfe 100644 --- a/backend/bills-indexer/internal/adapter/legisinfo/fetcher_test.go +++ b/backend/bills-indexer/internal/adapter/legisinfo/fetcher_test.go @@ -160,3 +160,47 @@ func TestFetcherBuildsRelationalBillRecordsFromLegisInfoExports(t *testing.T) { t.Fatalf("committee meetings = %#v", stage.Meetings) } } + +func TestConstructURL(t *testing.T) { + t.Run("XML construction", func(t *testing.T) { + first := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_1/C-11_E.xml" + want := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_2/C-11_E.xml" + got := constructXMLURL(first, 2) + if got != want { + t.Errorf("constructXMLURL = %q, want %q", got, want) + } + + // Empty URL returns empty + if constructXMLURL("", 2) != "" { + t.Error("constructXMLURL with empty string should return empty string") + } + + // If no _1/ exists, returns first URL unchanged + noMatch := "https://www.parl.ca/other/url.xml" + if constructXMLURL(noMatch, 2) != noMatch { + t.Errorf("constructXMLURL without matching prefix should return input unchanged") + } + }) + + t.Run("PDF construction", func(t *testing.T) { + first := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_1/C-11_1.PDF" + want := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_2/C-11_2.PDF" + got := constructPDFURL(first, 2) + if got != want { + t.Errorf("constructPDFURL = %q, want %q", got, want) + } + + firstLower := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_1/C-11_1.pdf" + wantLower := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_2/C-11_2.pdf" + gotLower := constructPDFURL(firstLower, 2) + if gotLower != wantLower { + t.Errorf("constructPDFURL lower = %q, want %q", gotLower, wantLower) + } + + // Empty URL returns empty + if constructPDFURL("", 2) != "" { + t.Error("constructPDFURL with empty string should return empty string") + } + }) +} +