Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion backend/bills-indexer/internal/adapter/legisinfo/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ func (f *Fetcher) fetchDetail(ctx context.Context, session domain.Session, numbe

func (f *Fetcher) enrichVersions(ctx context.Context, session domain.Session, number string, pubs []publicationJSON) []domain.BillVersion {
versions := make([]domain.BillVersion, 0, len(pubs))
var firstXMLURL, firstPDFURL string
for i, pub := range pubs {
stage := firstNonEmpty(pub.PublicationTypeNameEn, pub.PublicationTypeName)
slug := publicationSlug(stage)
Expand All @@ -171,7 +172,21 @@ func (f *Fetcher) enrichVersions(ctx context.Context, session domain.Session, nu
Source: "LEGISinfo publication",
SortOrder: i + 1,
}
xmlURL, pdfURL := f.fetchDocumentLinks(ctx, htmlURL)

var xmlURL, pdfURL string
if firstXMLURL == "" {
xmlURL, pdfURL = f.fetchDocumentLinks(ctx, htmlURL)
if xmlURL != "" {
firstXMLURL = xmlURL
}
if pdfURL != "" {
firstPDFURL = pdfURL
}
} else {
xmlURL = constructXMLURL(firstXMLURL, i+1)
pdfURL = constructPDFURL(firstPDFURL, i+1)
}

version.XMLURL = xmlURL
version.PDFURL = pdfURL

Expand All @@ -194,6 +209,25 @@ func (f *Fetcher) enrichVersions(ctx context.Context, session domain.Session, nu
return versions
}

func constructXMLURL(firstURL string, sortOrder int) string {
if firstURL == "" {
return ""
}
return strings.Replace(firstURL, "_1/", fmt.Sprintf("_%d/", sortOrder), 1)
}

func constructPDFURL(firstURL string, sortOrder int) string {
if firstURL == "" {
return ""
}
res := strings.Replace(firstURL, "_1/", fmt.Sprintf("_%d/", sortOrder), 1)
res = strings.Replace(res, "_1.PDF", fmt.Sprintf("_%d.PDF", sortOrder), 1)
res = strings.Replace(res, "_1.pdf", fmt.Sprintf("_%d.pdf", sortOrder), 1)
res = strings.Replace(res, "_1.Pdf", fmt.Sprintf("_%d.Pdf", sortOrder), 1)
return res
}


func (f *Fetcher) fetchDocumentLinks(ctx context.Context, pageURL string) (string, string) {
body, err := f.getBytes(ctx, pageURL, "text/html")
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,47 @@ func TestFetcherBuildsRelationalBillRecordsFromLegisInfoExports(t *testing.T) {
t.Fatalf("committee meetings = %#v", stage.Meetings)
}
}

func TestConstructURL(t *testing.T) {
t.Run("XML construction", func(t *testing.T) {
first := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_1/C-11_E.xml"
want := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_2/C-11_E.xml"
got := constructXMLURL(first, 2)
if got != want {
t.Errorf("constructXMLURL = %q, want %q", got, want)
}

// Empty URL returns empty
if constructXMLURL("", 2) != "" {
t.Error("constructXMLURL with empty string should return empty string")
}

// If no _1/ exists, returns first URL unchanged
noMatch := "https://www.parl.ca/other/url.xml"
if constructXMLURL(noMatch, 2) != noMatch {
t.Errorf("constructXMLURL without matching prefix should return input unchanged")
}
})

t.Run("PDF construction", func(t *testing.T) {
first := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_1/C-11_1.PDF"
want := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_2/C-11_2.PDF"
got := constructPDFURL(first, 2)
if got != want {
t.Errorf("constructPDFURL = %q, want %q", got, want)
}

firstLower := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_1/C-11_1.pdf"
wantLower := "https://www.parl.ca/Content/Bills/451/Government/C-11/C-11_2/C-11_2.pdf"
gotLower := constructPDFURL(firstLower, 2)
if gotLower != wantLower {
t.Errorf("constructPDFURL lower = %q, want %q", gotLower, wantLower)
}

// Empty URL returns empty
if constructPDFURL("", 2) != "" {
t.Error("constructPDFURL with empty string should return empty string")
}
})
}

Loading