diff --git a/.github/workflows/pr-build.yml b/.github/workflows/pr-build.yml index 582dd0e5..246b3ed6 100644 --- a/.github/workflows/pr-build.yml +++ b/.github/workflows/pr-build.yml @@ -102,6 +102,9 @@ jobs: - name: Run bills-indexer tests working-directory: backend/bills-indexer run: go test ./... + - name: Run bills tests + working-directory: backend/bills + run: go test ./... - name: Run members-indexer tests working-directory: backend/members-indexer run: go test ./... diff --git a/backend/bills-indexer/main.go b/backend/bills-indexer/main.go index e81ce70d..45b3d229 100644 --- a/backend/bills-indexer/main.go +++ b/backend/bills-indexer/main.go @@ -39,6 +39,9 @@ func main() { } func run(ctx context.Context) error { + if fixturePath := strings.TrimSpace(os.Getenv("BILLS_FIXTURE_BATCH")); fixturePath != "" { + return runFromFixture(ctx, fixturePath) + } session, err := sessionFromEnv() if err != nil { return err @@ -66,7 +69,7 @@ func run(ctx context.Context) error { ) writer := sqliteadapter.NewWriter(sqliteadapter.WithLogger(logger)) store := s3adapter.NewStore(awss3.NewFromConfig(awsCfg), bucket, prefix, s3adapter.WithLogger(logger)) - + dbPath := firstEnvDefault(defaultDBPath, "DB_PATH", "BILLS_DB_PATH") return runPipeline(ctx, session, prefix, bucket, source, writer, store, store, dbPath) } @@ -109,6 +112,37 @@ func runPipeline( return nil } +// runFromFixture builds the SQLite artifact from a local JSON batch instead of +// fetching from LEGISinfo and uploading to S3. The bills serving module's +// producer-to-consumer seam test (EPAC-2304) sets BILLS_FIXTURE_BATCH and +// DB_PATH to drive the real writer, then reads the on-disk artifact back with +// the serving repository so SQLite schema drift fails at build time. The +// deployed pipeline never sets BILLS_FIXTURE_BATCH. +func runFromFixture(ctx context.Context, fixturePath string) error { + data, err := os.ReadFile(fixturePath) + if err != nil { + return fmt.Errorf("read fixture batch: %w", err) + } + var batch domain.Batch + if err := json.Unmarshal(data, &batch); err != nil { + return fmt.Errorf("decode fixture batch: %w", err) + } + dbPath := firstEnvDefault(defaultDBPath, "DB_PATH", "BILLS_DB_PATH") + writer := sqliteadapter.NewWriter(sqliteadapter.WithLogger(func(payload map[string]any) { logJSON(payload) })) + stats, err := writer.Write(ctx, dbPath, batch) + if err != nil { + return fmt.Errorf("write fixture artifact: %w", err) + } + logJSON(map[string]any{ + "pipeline": "bills-indexer", + "event": "fixture_artifact_written", + "db_path": dbPath, + "bill_count": stats.BillCount, + "table_counts": stats.TableCounts, + }) + return nil +} + func sessionFromEnv() (domain.Session, error) { parliament, err := positiveIntFromEnv("PARLIAMENT_NUMBER", defaultParliamentNumber) if err != nil { diff --git a/backend/bills/artifact_seam_test.go b/backend/bills/artifact_seam_test.go new file mode 100644 index 00000000..f9577edb --- /dev/null +++ b/backend/bills/artifact_seam_test.go @@ -0,0 +1,229 @@ +package main + +// This is the producer-to-consumer seam test for the bills SQLite artifact +// (EPAC-2304). The artifact schema is an implicit contract between two separate +// binaries: the bills-indexer writer (producer) and the bills serving +// repository (consumer). Per-unit fixtures on either side can drift apart +// without anyone noticing, because the serving repository used to mask missing +// columns with NULL fallbacks. This test crosses the *real* seam: it drives the +// real producer binary to write a real bills.db from a fixture batch, then +// opens that file with the real serving repository and asserts the served shape +// is populated from columns the producer actually writes — no NULL fallbacks. +// +// The two adapters live in separate Go modules and are internal to each, so +// (matching the issue's guidance) we do not import one's internals into the +// other. The only thing shared across the seam is the on-disk SQLite file, just +// like in production: the producer runs as a binary and the consumer reads its +// output. +// +// Contract decision recorded here and in +// docs/architecture/bills-artifact-contract-epac2304.md: the bills-indexer's +// bill_versions table records only a publication stage name and a canonical +// viewer URL (html_url) per version. It has no label, title, or chamber column +// and never populates a published date. The served BillVersion therefore +// exposes exactly {id, label, stage, source_url}, where label and stage both +// carry the stage name and source_url is html_url. The previously-served title, +// chamber, and published_on fields were always empty and have been dropped from +// the domain model and OpenAPI. + +import ( + "bytes" + "context" + "database/sql" + "encoding/json" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + sqliteadapter "epac/bills/internal/adapter/sqlite" + + _ "modernc.org/sqlite" +) + +func TestBillsArtifactSeam(t *testing.T) { + batchJSON := contractFixtureBatchJSON(t) + dbPath := buildContractArtifact(t, batchJSON) + + db, err := sql.Open("sqlite", dbPath) + if err != nil { + t.Fatalf("open produced artifact: %v", err) + } + defer db.Close() + db.SetMaxOpenConns(1) + repo := sqliteadapter.New(db, sqliteadapter.WithNow(func() time.Time { + return time.Date(2026, 6, 14, 12, 0, 0, 0, time.UTC) + })) + ctx := context.Background() + + // ListBills must read the bills + bill_stages tables the producer wrote. + bills, err := repo.ListBills(ctx) + if err != nil { + t.Fatalf("ListBills: %v", err) + } + if len(bills) != 1 || bills[0].ID != "13543613" || bills[0].Number != "C-2" { + t.Fatalf("ListBills = %+v", bills) + } + if len(bills[0].Stages) != 2 || bills[0].Stages[0].Name != "First reading" { + t.Fatalf("ListBills stages = %+v", bills[0].Stages) + } + + // GetBillDepth assembles bill + stages + versions + amendments from the + // produced artifact. + bill, err := repo.GetBillDepth(ctx, "13543613") + if err != nil { + t.Fatalf("GetBillDepth: %v", err) + } + if bill.ID != "13543613" || bill.Title == "" { + t.Fatalf("GetBillDepth bill = %+v", bill) + } + if len(bill.Stages) != 2 { + t.Fatalf("GetBillDepth stages = %+v", bill.Stages) + } + if len(bill.Versions) != 2 { + t.Fatalf("GetBillDepth versions = %+v", bill.Versions) + } + // Promised version columns are populated from real producer columns. + v1 := bill.Versions[0] + if v1.ID != "C-2-v1" || v1.Label != "First Reading" || v1.Stage != "First Reading" { + t.Fatalf("version[0] = %+v", v1) + } + if v1.SourceURL == "" { + t.Fatalf("version[0] source_url empty; expected the producer html_url") + } + // Amendments expose the producer-backed fields (id, source_url). + if len(bill.Amendments) != 1 || bill.Amendments[0].ID != "C-2-a1" || bill.Amendments[0].SourceURL == "" { + t.Fatalf("GetBillDepth amendments = %+v", bill.Amendments) + } + + // GetBillVersionDiff is the bill-diff route's read. Both endpoints of the + // diff and every clause must be populated from the produced artifact. + diff, err := repo.GetBillVersionDiff(ctx, "13543613", "C-2-v1", "C-2-v2") + if err != nil { + t.Fatalf("GetBillVersionDiff: %v", err) + } + if diff == nil { + t.Fatal("GetBillVersionDiff = nil; expected a populated diff") + } + if diff.From.ID != "C-2-v1" || diff.From.Label != "First Reading" || diff.From.Stage != "First Reading" || diff.From.SourceURL == "" { + t.Fatalf("diff.From = %+v", diff.From) + } + if diff.To.ID != "C-2-v2" || diff.To.Label != "Third Reading" || diff.To.Stage != "Third Reading" || diff.To.SourceURL == "" { + t.Fatalf("diff.To = %+v", diff.To) + } + if len(diff.Clauses) != 2 { + t.Fatalf("diff.Clauses = %+v", diff.Clauses) + } + if diff.Clauses[0].ID != "C-2-clause-1" || diff.Clauses[0].ChangeType != "added" || diff.Clauses[0].FromText != "" || diff.Clauses[0].ToText == "" { + t.Fatalf("clause[0] = %+v", diff.Clauses[0]) + } + if diff.Clauses[1].ID != "C-2-clause-2" || diff.Clauses[1].ChangeType != "modified" || + diff.Clauses[1].HansardAnchorURL == nil || *diff.Clauses[1].HansardAnchorURL == "" { + t.Fatalf("clause[1] = %+v", diff.Clauses[1]) + } +} + +// buildContractArtifact builds the bills-indexer binary and runs it in offline +// fixture mode to write a real SQLite artifact from batchJSON, returning the +// path to the produced bills.db. Only the on-disk file crosses the seam. +func buildContractArtifact(t *testing.T, batchJSON []byte) string { + t.Helper() + + indexerDir, err := filepath.Abs("../bills-indexer") + if err != nil { + t.Fatalf("resolve bills-indexer dir: %v", err) + } + if _, err := os.Stat(filepath.Join(indexerDir, "main.go")); err != nil { + t.Fatalf("bills-indexer producer not found at %s: %v", indexerDir, err) + } + + tmp := t.TempDir() + fixturePath := filepath.Join(tmp, "batch.json") + if err := os.WriteFile(fixturePath, batchJSON, 0o644); err != nil { + t.Fatalf("write fixture batch: %v", err) + } + dbPath := filepath.Join(tmp, "bills.db") + binPath := filepath.Join(tmp, "bills-indexer-bin") + + ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) + defer cancel() + + build := exec.CommandContext(ctx, "go", "build", "-o", binPath, ".") + build.Dir = indexerDir + var buildOut bytes.Buffer + build.Stdout = &buildOut + build.Stderr = &buildOut + if err := build.Run(); err != nil { + t.Fatalf("build bills-indexer producer: %v\n%s", err, buildOut.String()) + } + + run := exec.CommandContext(ctx, binPath) + run.Env = append(os.Environ(), + "BILLS_FIXTURE_BATCH="+fixturePath, + "DB_PATH="+dbPath, + ) + var runOut bytes.Buffer + run.Stdout = &runOut + run.Stderr = &runOut + if err := run.Run(); err != nil { + t.Fatalf("run bills-indexer producer offline: %v\n%s", err, runOut.String()) + } + if _, err := os.Stat(dbPath); err != nil { + t.Fatalf("producer did not write artifact at %s: %v\n%s", dbPath, err, runOut.String()) + } + return dbPath +} + +// contractFixtureBatchJSON is a representative bills-indexer batch serialized +// with the producer domain's field names. Keeping it here (rather than +// importing the producer's internal domain types) preserves the module +// boundary: the producer decodes it into its own domain.Batch. +func contractFixtureBatchJSON(t *testing.T) []byte { + t.Helper() + + batch := map[string]any{ + "Bills": []map[string]any{{ + "ID": "13543613", + "Number": "C-2", + "Title": "An Act respecting certain measures relating to border security", + "ShortTitle": "Strong Borders Act", + "SponsorName": "Hon. Example Minister", + "Status": "At third reading", + "CurrentStage": "Third reading", + "IntroducedOn": "2026-05-01", + "SourceURL": "https://www.parl.ca/legisinfo/en/bill/45-1/c-2", + "BillType": "House Government Bill", + "Parliament": 45, + "Session": 1, + "LegisInfoURL": "https://www.parl.ca/legisinfo/en/bill/45-1/c-2", + "Stages": []map[string]any{ + {"ID": "60029", "Name": "First reading", "Chamber": "House of Commons", "State": "Completed", "CompletedDate": "2026-05-01", "SortOrder": 1, "IsCompleted": true}, + {"ID": "60030", "Name": "Third reading", "Chamber": "House of Commons", "State": "In progress", "SortOrder": 2, "IsCompleted": false}, + }, + "Versions": []map[string]any{ + {"ID": "C-2-v1", "PublicationID": "1001", "Stage": "First Reading", "StageSlug": "first-reading", "HTMLURL": "https://www.parl.ca/DocumentViewer/en/45-1/bill/C-2/first-reading", "XMLURL": "https://www.parl.ca/Content/Bills/451/Government/C-2/C-2_1/C-2_E.xml", "Source": "LEGISinfo publication", "SortOrder": 1}, + {"ID": "C-2-v2", "PublicationID": "1002", "Stage": "Third Reading", "StageSlug": "third-reading", "HTMLURL": "https://www.parl.ca/DocumentViewer/en/45-1/bill/C-2/third-reading", "XMLURL": "https://www.parl.ca/Content/Bills/451/Government/C-2/C-2_3/C-2_E.xml", "Source": "LEGISinfo publication", "SortOrder": 2}, + }, + "Diffs": []map[string]any{{ + "ID": "C-2-diff-v1-v2", + "FromVersionID": "C-2-v1", + "ToVersionID": "C-2-v2", + "SourceURL": "https://www.parl.ca/DocumentViewer/en/45-1/bill/C-2/diff", + "Clauses": []map[string]any{ + {"ID": "C-2-clause-1", "Label": "1", "ChangeType": "added", "FromText": "", "ToText": "Inserted clause 1 text.", "HansardAnchorURL": nil}, + {"ID": "C-2-clause-2", "Label": "2", "ChangeType": "modified", "FromText": "Old clause 2 text.", "ToText": "New clause 2 text.", "HansardAnchorURL": "https://www.ourcommons.ca/hansard#clause-2"}, + }, + }}, + "Amendments": []map[string]any{ + {"ID": "C-2-a1", "EventID": "event-1", "StageName": "Consideration in committee", "AmendmentNoteID": "note-1", "AmendmentCount": 3, "SourceURL": "https://www.parl.ca/DocumentViewer/en/45-1/bill/C-2/amendments"}, + }, + }}, + } + + data, err := json.Marshal(batch) + if err != nil { + t.Fatalf("marshal fixture batch: %v", err) + } + return data +} diff --git a/backend/bills/internal/adapter/sqlite/repository.go b/backend/bills/internal/adapter/sqlite/repository.go index b39b5e20..26c2a9f7 100644 --- a/backend/bills/internal/adapter/sqlite/repository.go +++ b/backend/bills/internal/adapter/sqlite/repository.go @@ -437,31 +437,20 @@ func (r *Repository) billStages(ctx context.Context, billID string) ([]domain.Bi return stages, nil } +// billVersions reads the version rows the bills-indexer writes. The producer's +// bill_versions table is the locked contract (see the seam test): per version +// it stores a stage name and a canonical viewer URL (html_url), so we read +// those with fixed SQL rather than probing for columns the indexer never +// writes. Label and Stage both carry the stage name; SourceURL is html_url. func (r *Repository) billVersions(ctx context.Context, billID string) ([]domain.BillVersion, error) { - columns, ok, err := r.tableColumns(ctx, "bill_versions") - if err != nil || !ok { + if ok, err := r.tableExists(ctx, "bill_versions"); err != nil || !ok { return []domain.BillVersion{}, err } - billIDColumn := firstColumn(columns, "bill_id", "legisinfo_id") - if billIDColumn == "" { - return []domain.BillVersion{}, nil - } - query := fmt.Sprintf(` - SELECT %s, %s, %s, %s, %s, %s, %s + rows, err := r.db.QueryContext(ctx, ` + SELECT id, stage, html_url FROM bill_versions - WHERE %s = ? - ORDER BY %s`, - columnExpr(columns, "id", "version_id"), - columnExpr(columns, "label", "version_label", "name", "stage"), - columnExpr(columns, "title"), - columnExpr(columns, "stage"), - columnExpr(columns, "chamber"), - columnExpr(columns, "published_on", "published_date", "version_date", "date"), - columnExpr(columns, "source_url", "html_url", "url", "text_source_url", "xml_url", "pdf_url"), - billIDColumn, - orderExpr(columns), - ) - rows, err := r.db.QueryContext(ctx, query, billID) + WHERE bill_id = ? + ORDER BY sort_order, rowid`, billID) if err != nil { return nil, fmt.Errorf("query bill versions sqlite artifact: %w", err) } @@ -469,18 +458,10 @@ func (r *Repository) billVersions(ctx context.Context, billID string) ([]domain. versions := make([]domain.BillVersion, 0) for rows.Next() { - var version domain.BillVersion - var id, label, title, stage, chamber, publishedOn, sourceURL sql.NullString - if err := rows.Scan(&id, &label, &title, &stage, &chamber, &publishedOn, &sourceURL); err != nil { - return nil, fmt.Errorf("scan bill versions sqlite artifact: %w", err) + version, err := scanBillVersion(rows) + if err != nil { + return nil, err } - version.ID = stringValue(id) - version.Label = stringValue(label) - version.Title = stringValue(title) - version.Stage = stringValue(stage) - version.Chamber = stringValue(chamber) - version.PublishedOn = stringPtr(publishedOn) - version.SourceURL = stringValue(sourceURL) versions = append(versions, version) } if err := rows.Err(); err != nil { @@ -490,58 +471,46 @@ func (r *Repository) billVersions(ctx context.Context, billID string) ([]domain. } func (r *Repository) billVersionByID(ctx context.Context, billID, versionID string) (domain.BillVersion, bool, error) { - columns, ok, err := r.tableColumns(ctx, "bill_versions") - if err != nil || !ok { + if ok, err := r.tableExists(ctx, "bill_versions"); err != nil || !ok { return domain.BillVersion{}, false, err } - billIDColumn := firstColumn(columns, "bill_id", "legisinfo_id") - versionIDColumn := firstColumn(columns, "id", "version_id") - if billIDColumn == "" || versionIDColumn == "" { - return domain.BillVersion{}, false, nil - } - query := fmt.Sprintf(` - SELECT %s, %s, %s, %s, %s, %s, %s + version, err := scanBillVersion(r.db.QueryRowContext(ctx, ` + SELECT id, stage, html_url FROM bill_versions - WHERE %s = ? AND %s = ? - LIMIT 1`, - columnExpr(columns, "id", "version_id"), - columnExpr(columns, "label", "version_label", "name", "stage"), - columnExpr(columns, "title"), - columnExpr(columns, "stage"), - columnExpr(columns, "chamber"), - columnExpr(columns, "published_on", "published_date", "version_date", "date"), - columnExpr(columns, "source_url", "html_url", "url", "text_source_url", "xml_url", "pdf_url"), - billIDColumn, - versionIDColumn, - ) - - var version domain.BillVersion - var id, label, title, stage, chamber, publishedOn, sourceURL sql.NullString - err = r.db.QueryRowContext(ctx, query, billID, versionID).Scan( - &id, - &label, - &title, - &stage, - &chamber, - &publishedOn, - &sourceURL, - ) + WHERE bill_id = ? AND id = ? + LIMIT 1`, billID, versionID)) if errors.Is(err, sql.ErrNoRows) { return domain.BillVersion{}, false, nil } if err != nil { - return domain.BillVersion{}, false, fmt.Errorf("query bill version sqlite artifact: %w", err) - } - version.ID = stringValue(id) - version.Label = stringValue(label) - version.Title = stringValue(title) - version.Stage = stringValue(stage) - version.Chamber = stringValue(chamber) - version.PublishedOn = stringPtr(publishedOn) - version.SourceURL = stringValue(sourceURL) + return domain.BillVersion{}, false, err + } return version, true, nil } +// rowScanner is satisfied by both *sql.Rows and *sql.Row so scanBillVersion can +// serve list and single-row reads from the same fixed projection. +type rowScanner interface { + Scan(dest ...any) error +} + +// scanBillVersion maps the producer's (id, stage, html_url) projection to the +// served BillVersion. The indexer has no separate version label or title, so +// Label and Stage both carry the publication stage name; SourceURL is the +// indexer's html_url. These are the only version fields the producer writes. +func scanBillVersion(scanner rowScanner) (domain.BillVersion, error) { + var id, stage, htmlURL sql.NullString + if err := scanner.Scan(&id, &stage, &htmlURL); err != nil { + return domain.BillVersion{}, fmt.Errorf("scan bill version sqlite artifact: %w", err) + } + return domain.BillVersion{ + ID: stringValue(id), + Label: stringValue(stage), + Stage: stringValue(stage), + SourceURL: stringValue(htmlURL), + }, nil +} + func (r *Repository) billClauseDiffs(ctx context.Context, billID, diffID string) ([]domain.BillClauseDiff, error) { rows, err := r.db.QueryContext(ctx, ` SELECT id, label, change_type, from_text, to_text, hansard_anchor_url @@ -574,33 +543,24 @@ func (r *Repository) billClauseDiffs(ctx context.Context, billID, diffID string) return clauses, nil } +// billAmendments reads the amendment rows the bills-indexer writes. The +// producer's bill_amendments table only carries an id and a source_url that map +// onto the served BillAmendment, so we read those with fixed SQL. The remaining +// served fields (number, title, status, stage, sponsor_name, proposed_on, text) +// have no producer column and are intentionally left empty — matching what +// production already returns. Trimming or enriching those served fields is a +// separate contract decision (out of scope for the version contract this seam +// locks); see the "Known limitation" section in +// docs/architecture/bills-artifact-contract-epac2304.md. func (r *Repository) billAmendments(ctx context.Context, billID string) ([]domain.BillAmendment, error) { - columns, ok, err := r.tableColumns(ctx, "bill_amendments") - if err != nil || !ok { + if ok, err := r.tableExists(ctx, "bill_amendments"); err != nil || !ok { return []domain.BillAmendment{}, err } - billIDColumn := firstColumn(columns, "bill_id", "legisinfo_id") - if billIDColumn == "" { - return []domain.BillAmendment{}, nil - } - query := fmt.Sprintf(` - SELECT %s, %s, %s, %s, %s, %s, %s, %s, %s + rows, err := r.db.QueryContext(ctx, ` + SELECT id, source_url FROM bill_amendments - WHERE %s = ? - ORDER BY %s`, - columnExpr(columns, "id", "amendment_id"), - columnExpr(columns, "number", "amendment_number"), - columnExpr(columns, "title"), - columnExpr(columns, "status"), - columnExpr(columns, "stage"), - columnExpr(columns, "sponsor_name", "sponsor"), - columnExpr(columns, "proposed_on", "date"), - columnExpr(columns, "text", "summary"), - columnExpr(columns, "source_url", "url"), - billIDColumn, - orderExpr(columns), - ) - rows, err := r.db.QueryContext(ctx, query, billID) + WHERE bill_id = ? + ORDER BY rowid`, billID) if err != nil { return nil, fmt.Errorf("query bill amendments sqlite artifact: %w", err) } @@ -609,18 +569,11 @@ func (r *Repository) billAmendments(ctx context.Context, billID string) ([]domai amendments := make([]domain.BillAmendment, 0) for rows.Next() { var amendment domain.BillAmendment - var id, number, title, status, stage, sponsorName, proposedOn, text, sourceURL sql.NullString - if err := rows.Scan(&id, &number, &title, &status, &stage, &sponsorName, &proposedOn, &text, &sourceURL); err != nil { + var id, sourceURL sql.NullString + if err := rows.Scan(&id, &sourceURL); err != nil { return nil, fmt.Errorf("scan bill amendments sqlite artifact: %w", err) } amendment.ID = stringValue(id) - amendment.Number = stringValue(number) - amendment.Title = stringValue(title) - amendment.Status = stringValue(status) - amendment.Stage = stringValue(stage) - amendment.SponsorName = stringValue(sponsorName) - amendment.ProposedOn = stringPtr(proposedOn) - amendment.Text = stringValue(text) amendment.SourceURL = stringValue(sourceURL) amendments = append(amendments, amendment) } @@ -642,58 +595,6 @@ func (r *Repository) tableExists(ctx context.Context, table string) (bool, error return true, nil } -func (r *Repository) tableColumns(ctx context.Context, table string) (map[string]bool, bool, error) { - if ok, err := r.tableExists(ctx, table); err != nil || !ok { - return nil, false, err - } - rows, err := r.db.QueryContext(ctx, "PRAGMA table_info("+table+")") - if err != nil { - return nil, false, fmt.Errorf("read sqlite table info %s: %w", table, err) - } - defer rows.Close() - - columns := map[string]bool{} - for rows.Next() { - var cid int - var name, columnType string - var notNull, pk int - var defaultValue any - if err := rows.Scan(&cid, &name, &columnType, ¬Null, &defaultValue, &pk); err != nil { - return nil, false, fmt.Errorf("scan sqlite table info %s: %w", table, err) - } - columns[name] = true - } - if err := rows.Err(); err != nil { - return nil, false, fmt.Errorf("iterate sqlite table info %s: %w", table, err) - } - return columns, true, nil -} - -func columnExpr(columns map[string]bool, candidates ...string) string { - for _, candidate := range candidates { - if columns[candidate] { - return candidate - } - } - return "NULL" -} - -func firstColumn(columns map[string]bool, candidates ...string) string { - for _, candidate := range candidates { - if columns[candidate] { - return candidate - } - } - return "" -} - -func orderExpr(columns map[string]bool) string { - if columns["sort_order"] { - return "sort_order, rowid" - } - return "rowid" -} - func stringValue(value sql.NullString) string { if !value.Valid { return "" diff --git a/backend/bills/internal/adapter/sqlite/repository_test.go b/backend/bills/internal/adapter/sqlite/repository_test.go index bf4fbd2c..63378ddb 100644 --- a/backend/bills/internal/adapter/sqlite/repository_test.go +++ b/backend/bills/internal/adapter/sqlite/repository_test.go @@ -26,9 +26,8 @@ func TestRepositoryGetBillVersionDiffMapsCurrentArtifactSchema(t *testing.T) { if diff.From.ID != "v1" || diff.From.Label != "First Reading" || diff.From.Stage != "First Reading" { t.Fatalf("from version = %+v", diff.From) } - if diff.From.PublishedOn == nil || *diff.From.PublishedOn != "2026-06-01" { - t.Fatalf("from published_on = %+v", diff.From.PublishedOn) - } + // Label mirrors the stage name (the producer has no separate label column), + // and SourceURL is read from the producer's html_url column. if diff.From.SourceURL != "https://www.parl.ca/v1" { t.Fatalf("from source_url = %q", diff.From.SourceURL) } diff --git a/backend/bills/internal/domain/domain.go b/backend/bills/internal/domain/domain.go index 88eb8c07..8908b8d3 100644 --- a/backend/bills/internal/domain/domain.go +++ b/backend/bills/internal/domain/domain.go @@ -18,14 +18,20 @@ type BillStage struct { IsCompleted bool `json:"is_completed"` } +// BillVersion is the served shape of a bill publication. Its fields are +// single-sourced from the bills-indexer SQLite artifact (the producer): the +// indexer's bill_versions table only carries a publication stage name and a +// canonical viewer URL per version, so the served contract exposes exactly +// those. Label and Stage both carry the LEGISinfo publication-type name (the +// indexer has no separate label column); SourceURL is the indexer's html_url. +// The artifact schema is locked by the producer-to-consumer seam test +// (TestBillsArtifactSeam in backend/bills). See +// docs/architecture/bills-artifact-contract-epac2304.md. type BillVersion struct { - ID string `json:"id,omitempty"` - Label string `json:"label,omitempty"` - Title string `json:"title,omitempty"` - Stage string `json:"stage,omitempty"` - Chamber string `json:"chamber,omitempty"` - PublishedOn *string `json:"published_on,omitempty"` - SourceURL string `json:"source_url,omitempty"` + ID string `json:"id,omitempty"` + Label string `json:"label,omitempty"` + Stage string `json:"stage,omitempty"` + SourceURL string `json:"source_url,omitempty"` } type BillAmendment struct { diff --git a/backend/bills/main_test.go b/backend/bills/main_test.go index d3221179..4443b791 100644 --- a/backend/bills/main_test.go +++ b/backend/bills/main_test.go @@ -75,7 +75,8 @@ func TestHandleRequestGetsBillDepth(t *testing.T) { if len(body.Bill.Versions) != 1 || body.Bill.Versions[0].Label != "First reading" { t.Fatalf("versions = %+v", body.Bill.Versions) } - if len(body.Bill.Amendments) != 1 || body.Bill.Amendments[0].Number != "NDP-1" { + if len(body.Bill.Amendments) != 1 || body.Bill.Amendments[0].ID != "C-2260-a1" || + body.Bill.Amendments[0].SourceURL != "https://www.parl.ca/amendment" { t.Fatalf("amendments = %+v", body.Bill.Amendments) } } @@ -382,16 +383,15 @@ func writeBillSQLiteUnitFixture(t *testing.T, dir string, bills []Bill) { )`); err != nil { t.Fatalf("create stages table: %v", err) } + // Mirror the columns the bills-indexer actually writes (see the producer + // schema and the artifact seam test): per version, a stage name and an + // html_url. The serving repo reads only these for the version contract. if _, err := db.Exec(`CREATE TABLE bill_versions ( bill_id TEXT NOT NULL, id TEXT NOT NULL, - label TEXT NOT NULL, - title TEXT NOT NULL DEFAULT '', stage TEXT NOT NULL DEFAULT '', - chamber TEXT NOT NULL DEFAULT '', - published_on TEXT, - source_url TEXT NOT NULL DEFAULT '', - sort_order INTEGER NOT NULL + html_url TEXT NOT NULL DEFAULT '', + sort_order INTEGER NOT NULL DEFAULT 0 )`); err != nil { t.Fatalf("create bill versions table: %v", err) } @@ -419,18 +419,17 @@ func writeBillSQLiteUnitFixture(t *testing.T, dir string, bills []Bill) { )`); err != nil { t.Fatalf("create bill clause diffs table: %v", err) } + // Mirror the bills-indexer's bill_amendments columns. The serving repo reads + // only id and source_url onto the served amendment today (the other served + // fields have no producer column); see billAmendments. if _, err := db.Exec(`CREATE TABLE bill_amendments ( bill_id TEXT NOT NULL, id TEXT NOT NULL, - number TEXT NOT NULL, - title TEXT NOT NULL DEFAULT '', - status TEXT NOT NULL DEFAULT '', - stage TEXT NOT NULL DEFAULT '', - sponsor_name TEXT NOT NULL DEFAULT '', - proposed_on TEXT, - text TEXT NOT NULL DEFAULT '', - source_url TEXT NOT NULL DEFAULT '', - sort_order INTEGER NOT NULL + event_id TEXT NOT NULL DEFAULT '', + stage_name TEXT NOT NULL DEFAULT '', + amendment_note_id TEXT NOT NULL DEFAULT '', + amendment_count INTEGER NOT NULL DEFAULT 0, + source_url TEXT NOT NULL DEFAULT '' )`); err != nil { t.Fatalf("create bill amendments table: %v", err) } @@ -483,20 +482,20 @@ func writeBillSQLiteUnitFixture(t *testing.T, dir string, bills []Bill) { t.Fatalf("insert stage fixture: %v", err) } if _, err := db.Exec(` - INSERT INTO bill_versions (bill_id, id, label, title, stage, chamber, published_on, source_url, sort_order) - VALUES ('C-2260', 'C-2260-v1', 'First reading', 'Depth Act first reading', 'House First Reading', 'House', '2026-06-01', 'https://www.parl.ca/version', 1)`); err != nil { + INSERT INTO bill_versions (bill_id, id, stage, html_url, sort_order) + VALUES ('C-2260', 'C-2260-v1', 'First reading', 'https://www.parl.ca/version', 1)`); err != nil { t.Fatalf("insert version fixture: %v", err) } if _, err := db.Exec(` - INSERT INTO bill_versions (bill_id, id, label, title, stage, chamber, published_on, source_url, sort_order) - VALUES ('C-1', 'C-1-v1', 'First reading', 'One Version Act first reading', 'House First Reading', 'House', '2026-06-01', 'https://www.parl.ca/c1/v1', 1)`); err != nil { + INSERT INTO bill_versions (bill_id, id, stage, html_url, sort_order) + VALUES ('C-1', 'C-1-v1', 'First reading', 'https://www.parl.ca/c1/v1', 1)`); err != nil { t.Fatalf("insert one-version fixture: %v", err) } if _, err := db.Exec(` - INSERT INTO bill_versions (bill_id, id, label, title, stage, chamber, published_on, source_url, sort_order) + INSERT INTO bill_versions (bill_id, id, stage, html_url, sort_order) VALUES - ('C-2287', 'C-2287-v1', 'First reading', 'Diff Act first reading', 'House First Reading', 'House', '2026-06-01', 'https://www.parl.ca/c2287/v1', 1), - ('C-2287', 'C-2287-v2', 'Third reading', 'Diff Act third reading', 'House Third Reading', 'House', '2026-06-10', 'https://www.parl.ca/c2287/v2', 2)`); err != nil { + ('C-2287', 'C-2287-v1', 'First reading', 'https://www.parl.ca/c2287/v1', 1), + ('C-2287', 'C-2287-v2', 'Third reading', 'https://www.parl.ca/c2287/v2', 2)`); err != nil { t.Fatalf("insert diff versions fixture: %v", err) } if _, err := db.Exec(` @@ -513,8 +512,8 @@ func writeBillSQLiteUnitFixture(t *testing.T, dir string, bills []Bill) { t.Fatalf("insert clause diff fixture: %v", err) } if _, err := db.Exec(` - INSERT INTO bill_amendments (bill_id, id, number, title, status, stage, sponsor_name, proposed_on, text, source_url, sort_order) - VALUES ('C-2260', 'C-2260-a1', 'NDP-1', 'Add review clause', 'adopted', 'Committee', 'Jane Example', '2026-06-02', 'Clause 2 is amended...', 'https://www.parl.ca/amendment', 1)`); err != nil { + INSERT INTO bill_amendments (bill_id, id, event_id, stage_name, amendment_note_id, amendment_count, source_url) + VALUES ('C-2260', 'C-2260-a1', 'event-1', 'Consideration in committee', 'note-1', 3, 'https://www.parl.ca/amendment')`); err != nil { t.Fatalf("insert amendment fixture: %v", err) } if _, err := db.Exec(` diff --git a/backend/openapi/openapi.json b/backend/openapi/openapi.json index bfbe5b9c..2136549f 100644 --- a/backend/openapi/openapi.json +++ b/backend/openapi/openapi.json @@ -815,7 +815,7 @@ { "id": "C-1-v1", "label": "First reading", - "published_on": "2026-04-27", + "stage": "First reading", "source_url": "https://www.parl.ca/version" } ], @@ -942,17 +942,15 @@ "value": { "from": { "id": "C-8-v1", - "label": "First reading", + "label": "First Reading", "stage": "First Reading", - "chamber": "House of Commons", - "published_on": "2026-04-27" + "source_url": "https://www.parl.ca/DocumentViewer/en/45-1/bill/C-8/first-reading" }, "to": { "id": "C-8-v3", - "label": "As passed by the House", + "label": "Third Reading", "stage": "Third Reading", - "chamber": "House of Commons", - "published_on": "2026-06-04" + "source_url": "https://www.parl.ca/DocumentViewer/en/45-1/bill/C-8/third-reading" }, "clauses": [ { @@ -2708,13 +2706,11 @@ }, "BillVersion": { "type": "object", + "description": "A bill publication as served from the bills SQLite artifact. The bills-indexer only records a publication stage name and a canonical viewer URL per version, so label and stage both carry the LEGISinfo publication-type name and source_url is that viewer URL. The producer-to-consumer seam test locks this field set.", "properties": { "id": { "type": "string" }, "label": { "type": "string" }, - "title": { "type": "string" }, "stage": { "type": "string" }, - "chamber": { "type": "string" }, - "published_on": { "type": "string", "format": "date" }, "source_url": { "type": "string", "format": "uri" } } }, diff --git a/docs/architecture/bills-artifact-contract-epac2304.md b/docs/architecture/bills-artifact-contract-epac2304.md new file mode 100644 index 00000000..905d317e --- /dev/null +++ b/docs/architecture/bills-artifact-contract-epac2304.md @@ -0,0 +1,46 @@ +# Bills SQLite Artifact Contract (EPAC-2304) + +**Status:** Accepted for v1 +**Last updated:** 2026-06-14 +**Decision owner:** Riddim Software + +## Decision + +The bills SQLite artifact is the contract between two separate Go binaries: + +- **Producer:** the bills-indexer writer, `backend/bills-indexer/internal/adapter/sqlite/writer.go`. It creates the schema and writes the rows. +- **Consumer:** the bills serving repository, `backend/bills/internal/adapter/sqlite/repository.go`. It opens the artifact read-only and serves `/api/v1/bills` routes from it. + +The served `BillVersion` is single-sourced from what the producer actually writes. For each version the indexer records a publication **stage** name and a canonical viewer **URL** (`html_url`) — nothing else. The served contract is therefore exactly: + +| Served field | Source column | Notes | +|---|---|---| +| `id` | `bill_versions.id` | | +| `label` | `bill_versions.stage` | The indexer has no separate label column; the publication-type name *is* the label. | +| `stage` | `bill_versions.stage` | Same value as `label`. | +| `source_url` | `bill_versions.html_url` | The producer's canonical viewer URL. | + +The previously-served `title`, `chamber`, and `published_on` fields were **dropped** from the domain model (`backend/bills/internal/domain/domain.go`) and from `backend/openapi/openapi.json`. The indexer never wrote those columns, so they were always empty in responses (and silently omitted via `omitempty`). Removing them does not change any byte iOS receives at runtime — iOS already decodes every version field as optional and coerces empty to nil. + +`label` and `stage` deliberately carry the same value until the producer gains a distinct label datum. Keeping `label` (rather than dropping it as redundant) preserves the field iOS currently renders. + +## Why not populate the dropped fields instead? + +The alternative was to make the indexer persist `chamber`, a real `published_date`, and an explicit `label`/`title`. The indexer's per-publication source data (LEGISinfo `publicationJSON`) carries only the publication type name and id; there is no authoritative chamber, title, or publication date to map from. Inventing those values would violate the project rule that civic content must trace to an authoritative source. Trimming the contract to what the producer can faithfully supply is the honest single-sourcing. + +## How the contract is locked + +`TestBillsArtifactSeam` (`backend/bills/artifact_seam_test.go`) is a build-time seam test: + +1. It serializes a representative batch and drives the **real** producer binary in offline fixture mode (`BILLS_FIXTURE_BATCH`, see `backend/bills-indexer/main.go`) to write a real `bills.db`. +2. It opens that file with the **real** serving repository and asserts `ListBills`, `GetBillDepth`, and `GetBillVersionDiff` return the promised columns populated — no reliance on NULL fallbacks. + +The two adapters are `internal` to separate Go modules, so the test imports neither into the other; the only thing shared across the seam is the on-disk SQLite file, exactly as in production. The test runs in CI via the `backend-tests` job (`Run bills tests` step in `.github/workflows/pr-build.yml`), so schema drift now fails at build time instead of at staging smoke or in production. + +Because the serving repository now reads fixed column projections, the dynamic column-name fallbacks (`columnExpr`/`firstColumn`/`tableColumns`/`orderExpr`) that previously masked drift have been removed. + +## Known limitation — bill amendments (recommended follow-up) + +The served `BillAmendment` has the same shape of latent gap, and it is **out of scope** for this version-contract work. The indexer's `bill_amendments` table records `event_id`, `stage_name`, `amendment_note_id`, `amendment_count`, and `source_url`; the served `BillAmendment` declares `number`, `title`, `status`, `stage`, `sponsor_name`, `proposed_on`, and `text`. Only `id` and `source_url` overlap, so the serving repository now reads exactly those two with fixed SQL (`billAmendments`) — identical to what production already returned, but without dead fallbacks. + +The served amendment field set still over-promises in `openapi.json`. Reconciling it (either trimming those fields or enriching the indexer to populate `amendment_count`/`stage_name`) is a separate contract decision that also touches the iOS amendments panel. It should be filed as a follow-up against the bills serving + bills-indexer adapters; this document and the `billAmendments` comment are the breadcrumb.