From 7d2f723084d8161adee63a47930dcc5659a33e04 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Wed, 17 Dec 2025 09:39:54 +0000 Subject: [PATCH 01/15] chore: lint fix - mostly whitespace only --- .../species/trade_name_prefix_matcher_spec.rb | 14 ++++++++++---- .../taxon_concept_prefix_matcher_spec.rb | 19 ++++++++++++++++--- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/spec/services/species/trade_name_prefix_matcher_spec.rb b/spec/services/species/trade_name_prefix_matcher_spec.rb index e41ccd1c2..ccbdefe2c 100644 --- a/spec/services/species/trade_name_prefix_matcher_spec.rb +++ b/spec/services/species/trade_name_prefix_matcher_spec.rb @@ -4,10 +4,12 @@ @accepted_name = create_cites_eu_genus( taxon_name: create(:taxon_name, scientific_name: 'Pavona') ) + @trade_name = create_cites_eu_species( taxon_name: create(:taxon_name, scientific_name: 'Pavona minor'), name_status: 'T' ) + @status_N_species = create_cites_eu_species( taxon_name: create(:taxon_name, scientific_name: 'Paradisaea'), parent: create_cites_eu_genus( @@ -15,14 +17,18 @@ ), name_status: 'N' ) + create( :taxon_relationship, taxon_concept: @accepted_name, other_taxon_concept: @trade_name, taxon_relationship_type: trade_name_relationship_type ) + create_cites_I_addition(taxon_concept: @accepted_name) + SapiModule::StoredProcedures.rebuild_cites_taxonomy_and_listings + @accepted_name_ac = MAutoCompleteTaxonConcept.find(@accepted_name.id) @trade_name_ac = MAutoCompleteTaxonConcept.find(@trade_name.id) @status_N_species_ac = MAutoCompleteTaxonConcept.find(@status_N_species.id) @@ -42,9 +48,9 @@ end specify { expect(subject.results).to include(@status_N_species_ac) } - end + end - context 'when trade internal visibility' do + context 'when trade internal visibility' do subject do Species::TaxonConceptPrefixMatcher.new( { @@ -56,9 +62,9 @@ end specify { expect(subject.results).to include(@status_N_species_ac) } - end + end - context 'when speciesplus visibility' do + context 'when speciesplus visibility' do subject do Species::TaxonConceptPrefixMatcher.new( { diff --git a/spec/services/taxon_concept_prefix_matcher_spec.rb b/spec/services/taxon_concept_prefix_matcher_spec.rb index 0d0762b45..2ad678631 100644 --- a/spec/services/taxon_concept_prefix_matcher_spec.rb +++ b/spec/services/taxon_concept_prefix_matcher_spec.rb @@ -8,12 +8,14 @@ taxon_name: create(:taxon_name, scientific_name: 'Aaa') ) end + let!(:taxon_concept2) do create_cites_eu_family( taxon_name: create(:taxon_name, scientific_name: 'Aac'), parent: taxon_concept1 ) end + let!(:taxon_concept3) do create_cites_eu_subfamily( taxon_name: create(:taxon_name, scientific_name: 'Aab'), @@ -32,23 +34,28 @@ parent: taxon_concept3 ) end + let!(:hybrid) do - tmp = create_cites_eu_genus( + hybrid_genus = create_cites_eu_genus( taxon_name: create(:taxon_name, scientific_name: 'Abc'), name_status: 'H' ) + create( :taxon_relationship, taxon_concept: taxon_concept4, - other_taxon_concept: tmp, + other_taxon_concept: hybrid_genus, taxon_relationship_type: hybrid_relationship_type ) - tmp + + hybrid_genus end + context 'when name status not specified' do let(:matcher_params) do SearchParams.new(taxonomy: { id: taxonomy.id }, scientific_name: 'Ab') end + let(:matcher) { TaxonConceptPrefixMatcher.new matcher_params } specify { expect(matcher.taxon_concepts).to include(taxon_concept4) } specify { expect(matcher.taxon_concepts).not_to include(hybrid) } @@ -58,7 +65,9 @@ let(:matcher_params) do SearchParams.new(taxonomy: { id: taxonomy.id }, scientific_name: 'Ab', name_status: 'H') end + let(:matcher) { TaxonConceptPrefixMatcher.new matcher_params } + specify { expect(matcher.taxon_concepts).not_to include(taxon_concept4) } specify { expect(matcher.taxon_concepts).to include(hybrid) } end @@ -71,6 +80,7 @@ scientific_name: 'A' ) end + let(:parent_matcher) do TaxonConceptPrefixMatcher.new parent_matcher_params end @@ -88,6 +98,7 @@ scientific_name: 'AAA' ) end + let(:ancestor_matcher) do TaxonConceptPrefixMatcher.new ancestor_matcher_params end @@ -105,6 +116,7 @@ scientific_name: 'AAA' ) end + let(:self_and_ancestor_matcher) do TaxonConceptPrefixMatcher.new self_and_ancestor_matcher_params end @@ -140,6 +152,7 @@ scientific_name: 'A' ) end + let(:descendant_matcher) do TaxonConceptPrefixMatcher.new descendant_matcher_params end From be1cbb27197caefa1aa63c1589df8035ea900999 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Wed, 13 Aug 2025 17:22:20 +0100 Subject: [PATCH 02/15] feat: write a view to make 003_rebuild_designation_all_listing_changes_mview redundant --- ..._designation_all_listing_changes_mview.sql | 138 +++++++++++++++++- 1 file changed, 132 insertions(+), 6 deletions(-) diff --git a/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql b/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql index d9d8ec975..7d53cbcaf 100644 --- a/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql +++ b/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql @@ -1,9 +1,135 @@ - DROP FUNCTION IF EXISTS rebuild_designation_all_listing_changes_mview( - taxonomy taxonomies, designation designations - ); - CREATE OR REPLACE FUNCTION rebuild_designation_all_listing_changes_mview( - taxonomy taxonomies, designation designations, events_ids INT[] - ) RETURNS void +CREATE OR REPLACE VIEW all_listing_changes_mview AS + -- affected_taxon_concept -- is a taxon concept that is affected by this listing change, + -- even though it might not have an explicit connection to it + -- (i.e. it''s an ancestor''s listing change) + WITH designations_and_intervals AS ( + SELECT + designations.id designation_id, + designations.name designation_name, + designations.taxonomy_id taxonomy_id, + intervals.start_date interval_start_date, + intervals.end_date interval_end_date, + intervals.events_ids interval_events_ids + FROM designations + LEFT JOIN eu_regulations_applicability_view intervals + ON designations.name = 'EU' + ), listing_changes_with_exceptions AS ( + -- the purpose of this CTE is to aggregate excluded taxon concept ids + SELECT + listing_changes.id, + change_types.designation_id, + change_types.name AS change_type_name, + listing_changes.taxon_concept_id, + listing_changes.species_listing_id, + listing_changes.change_type_id, + listing_changes.inclusion_taxon_concept_id, + listing_changes.event_id, + listing_changes.effective_at::DATE, + listing_changes.is_current, + ARRAY_AGG_NOTNULL(taxonomic_exceptions.taxon_concept_id) AS excluded_taxon_concept_ids + FROM listing_changes + LEFT JOIN listing_changes taxonomic_exceptions + ON listing_changes.id = taxonomic_exceptions.parent_id + AND listing_changes.taxon_concept_id != taxonomic_exceptions.taxon_concept_id + JOIN change_types ON change_types.id = listing_changes.change_type_id + JOIN designations_and_intervals + ON designations_and_intervals.designation_id = change_types.designation_id + AND ( + designations_and_intervals IS NULL + OR ARRAY_UPPER(designations_and_intervals.interval_events_ids, 1) IS NULL + OR listing_changes.event_id = ANY(designations_and_intervals.interval_events_ids) + ) + GROUP BY + listing_changes.id, + change_types.designation_id, + change_types.name, + listing_changes.taxon_concept_id, + listing_changes.species_listing_id, + listing_changes.change_type_id, + listing_changes.inclusion_taxon_concept_id, + listing_changes.event_id, + listing_changes.effective_at::DATE, + listing_changes.is_current + ), aggregate_lc AS ( + -- the purpose of this CTE is to aggregate listed and excluded populations + SELECT lc.id, + lc.designation_id, + lc.change_type_name, + lc.taxon_concept_id, + lc.species_listing_id, + lc.change_type_id, + lc.inclusion_taxon_concept_id, + lc.event_id, + lc.effective_at, + lc.is_current, + lc.excluded_taxon_concept_ids, + party_distribution.geo_entity_id AS party_id, + ARRAY_AGG_NOTNULL(listing_distributions.geo_entity_id) AS listed_geo_entities_ids, + ARRAY_AGG_NOTNULL(excluded_distributions.geo_entity_id) AS excluded_geo_entities_ids + FROM listing_changes_with_exceptions lc + LEFT JOIN listing_distributions + ON lc.id = listing_distributions.listing_change_id + AND NOT listing_distributions.is_party + LEFT JOIN listing_distributions party_distribution + ON lc.id = party_distribution.listing_change_id + AND party_distribution.is_party + LEFT JOIN listing_changes population_exceptions + ON lc.id = population_exceptions.parent_id + AND lc.taxon_concept_id = population_exceptions.taxon_concept_id + LEFT JOIN listing_distributions excluded_distributions + ON population_exceptions.id = excluded_distributions.listing_change_id + AND NOT excluded_distributions.is_party + GROUP BY + lc.id, + lc.designation_id, + lc.change_type_name, + lc.taxon_concept_id, + lc.species_listing_id, + lc.change_type_id, + lc.inclusion_taxon_concept_id, + lc.event_id, + lc.effective_at, + lc.is_current, + party_distribution.geo_entity_id, + lc.excluded_taxon_concept_ids +) +SELECT + lc.*, + tc.taxon_concept_id AS affected_taxon_concept_id, + COALESCE(itc.tree_distance, tc.tree_distance) tree_distance, + -- the following ROW_NUMBER call will assign chronological order to listing changes + -- in scope of the affected taxon concept and a particular designation + ROW_NUMBER() OVER ( + PARTITION BY tc.taxon_concept_id, designation_id + ORDER BY effective_at, + CASE + WHEN change_type_name = 'DELETION' THEN 0 + WHEN change_type_name = 'RESERVATION_WITHDRAWAL' THEN 1 + WHEN change_type_name = 'ADDITION' THEN 2 + WHEN change_type_name = 'RESERVATION' THEN 3 + WHEN change_type_name = 'EXCEPTION' THEN 4 + END, + tc.tree_distance + -- ??? OR would it be better to + -- COALESCE(itc.tree_distance, tc.tree_distance) tree_distance + )::INT AS timeline_position +FROM aggregate_lc lc +JOIN taxon_concepts_and_ancestors_mview tc + ON lc.taxon_concept_id = tc.ancestor_taxon_concept_id +LEFT JOIN taxon_concepts_and_ancestors_mview itc + ON lc.inclusion_taxon_concept_id = itc.ancestor_taxon_concept_id + AND lc.taxon_concept_id = itc.taxon_concept_id +; + + +DROP FUNCTION IF EXISTS rebuild_designation_all_listing_changes_mview( + taxonomy taxonomies, designation designations +); + + +CREATE OR REPLACE FUNCTION rebuild_designation_all_listing_changes_mview( + taxonomy taxonomies, designation designations, events_ids INT[] +) RETURNS void LANGUAGE plpgsql AS $$ DECLARE From a2f4376960bc2347368d5357b190303fd43ca4cb Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Thu, 14 Aug 2025 15:15:31 +0100 Subject: [PATCH 03/15] badwop --- ...eu_applicable_listing_changes_for_node.sql | 193 ++++++++++++++++++ 1 file changed, 193 insertions(+) diff --git a/db/mviews/004a_cites_eu_applicable_listing_changes_for_node.sql b/db/mviews/004a_cites_eu_applicable_listing_changes_for_node.sql index 6263cd92a..ddf5e3983 100644 --- a/db/mviews/004a_cites_eu_applicable_listing_changes_for_node.sql +++ b/db/mviews/004a_cites_eu_applicable_listing_changes_for_node.sql @@ -1,3 +1,196 @@ +-- Todo: Handle `$1` + +WITH RECURSIVE listing_changes_timeline AS ( + SELECT lc.id, + designation_id, + affected_taxon_concept_id AS original_taxon_concept_id, + taxon_concept_id AS current_taxon_concept_id, + CASE -- context + WHEN inclusion_taxon_concept_id IS NULL + THEN HSTORE(species_listing_id::TEXT, taxon_concept_id::TEXT) + ELSE HSTORE(species_listing_id::TEXT, inclusion_taxon_concept_id::TEXT) + END AS context, + inclusion_taxon_concept_id, + species_listing_id, + change_type_id, + event_id, + effective_at, + tree_distance AS context_tree_distance, + timeline_position, + CASE -- is_applicable + WHEN ( + -- there are listed populations + ARRAY_UPPER(listed_geo_entities_ids, 1) IS NOT NULL + -- and the taxon has its own distribution and does not occur in any of them + AND ARRAY_UPPER(taxon_concepts_mview.countries_ids_ary, 1) IS NOT NULL + AND NOT listed_geo_entities_ids && taxon_concepts_mview.countries_ids_ary + ) OR ( + -- when all populations are excluded + ARRAY_UPPER(excluded_geo_entities_ids, 1) IS NOT NULL + AND ARRAY_UPPER(taxon_concepts_mview.countries_ids_ary, 1) IS NOT NULL + AND excluded_geo_entities_ids @> taxon_concepts_mview.countries_ids_ary + ) + THEN FALSE + WHEN ARRAY_UPPER(excluded_taxon_concept_ids, 1) IS NOT NULL + -- if taxon or any of its ancestors is excluded from this listing + AND excluded_taxon_concept_ids && ARRAY[ + affected_taxon_concept_id, + taxon_concepts_mview.kingdom_id, + taxon_concepts_mview.phylum_id, + taxon_concepts_mview.class_id, + taxon_concepts_mview.order_id, + taxon_concepts_mview.family_id, + taxon_concepts_mview.genus_id, + taxon_concepts_mview.species_id + ] + THEN FALSE + ELSE TRUE + END AS is_applicable + FROM all_taxon_listing_changes_view lc + JOIN cites_eu_tmp_taxon_concepts_mview taxon_concepts_mview + ON lc.affected_taxon_concept_id = taxon_concepts_mview.id + WHERE timeline_position = 1 + -- AND lc.affected_taxon_concept_id = $1 + + UNION + + SELECT hi.id, + hi.designation_id, + listing_changes_timeline.original_taxon_concept_id, + hi.taxon_concept_id, + CASE -- context + WHEN hi.inclusion_taxon_concept_id IS NOT NULL + AND ( + AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + OR listing_changes_timeline.context = ''::HSTORE + ) + THEN HSTORE(hi.species_listing_id::TEXT, hi.inclusion_taxon_concept_id::TEXT) + WHEN change_types.name = 'DELETION' + AND hi.taxon_concept_id = hi.affected_taxon_concept_id + THEN listing_changes_timeline.context - ARRAY[hi.species_listing_id::TEXT] + WHEN change_types.name = 'DELETION' + THEN listing_changes_timeline.context - HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- if it is a new listing at closer level that replaces an older listing, wipe out the context + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + AND hi.effective_at > listing_changes_timeline.effective_at + AND change_types.name = 'ADDITION' + THEN HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- if it is a same day split listing we don''t want to wipe the other part of the split from the context + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + AND change_types.name = 'ADDITION' + THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + WHEN hi.tree_distance <= listing_changes_timeline.context_tree_distance + AND hi.affected_taxon_concept_id = hi.taxon_concept_id + AND change_types.name = 'ADDITION' + THEN HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- changing this to <= breaks Ursus arctos isabellinus + WHEN hi.tree_distance <= listing_changes_timeline.context_tree_distance + AND change_types.name = 'ADDITION' + THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + ELSE listing_changes_timeline.context + END AS context, + hi.inclusion_taxon_concept_id, + hi.species_listing_id, + hi.change_type_id, + hi.event_id, + hi.effective_at, + CASE -- context_tree_distance + WHEN ( + hi.inclusion_taxon_concept_id IS NOT NULL + AND AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + ) OR hi.tree_distance < listing_changes_timeline.context_tree_distance + THEN hi.tree_distance + ELSE listing_changes_timeline.context_tree_distance + END AS context_tree_distance, + hi.timeline_position, + CASE -- is applicable + WHEN ( + -- there are listed populations + ARRAY_UPPER(hi.listed_geo_entities_ids, 1) IS NOT NULL + -- and the taxon has its own distribution and does not occur in any of them + AND ARRAY_UPPER(taxon_concepts_mview.countries_ids_ary, 1) IS NOT NULL + AND NOT hi.listed_geo_entities_ids && taxon_concepts_mview.countries_ids_ary + ) OR ( + -- when all populations are excluded + ARRAY_UPPER(hi.excluded_geo_entities_ids, 1) IS NOT NULL + AND ARRAY_UPPER(taxon_concepts_mview.countries_ids_ary, 1) IS NOT NULL + AND hi.excluded_geo_entities_ids @> taxon_concepts_mview.countries_ids_ary + ) + THEN FALSE + WHEN ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NOT NULL + -- if taxon or any of its ancestors is excluded from this listing + AND hi.excluded_taxon_concept_ids && ARRAY[ + hi.affected_taxon_concept_id, + taxon_concepts_mview.kingdom_id, + taxon_concepts_mview.phylum_id, + taxon_concepts_mview.class_id, + taxon_concepts_mview.order_id, + taxon_concepts_mview.family_id, + taxon_concepts_mview.genus_id, + taxon_concepts_mview.species_id + ] + THEN FALSE + WHEN listing_changes_timeline.context -> hi.species_listing_id::TEXT = hi.taxon_concept_id::TEXT + OR hi.taxon_concept_id = listing_changes_timeline.original_taxon_concept_id + -- this line to make Moschus leucogaster happy + OR AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + THEN TRUE + WHEN listing_changes_timeline.context = ''::HSTORE --this would be the case when deleted + AND ( + ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NOT NULL + AND NOT hi.excluded_taxon_concept_ids && ARRAY[hi.affected_taxon_concept_id] + OR ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NULL + ) + AND hi.inclusion_taxon_concept_id IS NULL + AND hi.change_type_name = 'ADDITION' + THEN TRUE -- allows for re-listing + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + THEN TRUE + ELSE FALSE + END AS is_applicable + FROM all_taxon_listing_changes_view hi + JOIN listing_changes_timeline + ON hi.designation_id = listing_changes_timeline.designation_id + AND listing_changes_timeline.original_taxon_concept_id = hi.affected_taxon_concept_id + AND listing_changes_timeline.timeline_position + 1 = hi.timeline_position + JOIN change_types + ON hi.change_type_id = change_types.id + JOIN cites_eu_tmp_taxon_concepts_mview taxon_concepts_mview + ON hi.affected_taxon_concept_id = taxon_concepts_mview.id +) +SELECT listing_changes_timeline.id +FROM listing_changes_timeline +WHERE is_applicable +ORDER BY timeline_position +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + DROP FUNCTION IF EXISTS cites_eu_applicable_listing_changes_for_node(designation_name TEXT, node_id INT); CREATE OR REPLACE FUNCTION cites_eu_applicable_listing_changes_for_node(all_listing_changes_mview TEXT, node_id INT) RETURNS SETOF INT From 7952233baf873ac7f77f6b14375272d9670360a7 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Thu, 18 Dec 2025 11:48:21 +0000 Subject: [PATCH 04/15] morewop --- ..._designation_all_listing_changes_mview.sql | 164 +++++++++--------- 1 file changed, 84 insertions(+), 80 deletions(-) diff --git a/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql b/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql index 7d53cbcaf..1d8633a5f 100644 --- a/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql +++ b/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql @@ -1,7 +1,7 @@ -CREATE OR REPLACE VIEW all_listing_changes_mview AS - -- affected_taxon_concept -- is a taxon concept that is affected by this listing change, - -- even though it might not have an explicit connection to it - -- (i.e. it''s an ancestor''s listing change) +CREATE OR REPLACE VIEW all_listing_changes_view AS + -- affected_taxon_concept is a taxon concept that is affected by this listing + -- change, even though it might not have an explicit connection to it + -- (i.e. it is an ancestor's listing change). WITH designations_and_intervals AS ( SELECT designations.id designation_id, @@ -96,6 +96,8 @@ CREATE OR REPLACE VIEW all_listing_changes_mview AS SELECT lc.*, tc.taxon_concept_id AS affected_taxon_concept_id, + -- Make the tree distance reflect distance from inclusion + -- TODO TEST Rhinopittecus roxellana COALESCE(itc.tree_distance, tc.tree_distance) tree_distance, -- the following ROW_NUMBER call will assign chronological order to listing changes -- in scope of the affected taxon concept and a particular designation @@ -121,33 +123,11 @@ LEFT JOIN taxon_concepts_and_ancestors_mview itc AND lc.taxon_concept_id = itc.taxon_concept_id ; - DROP FUNCTION IF EXISTS rebuild_designation_all_listing_changes_mview( taxonomy taxonomies, designation designations ); - -CREATE OR REPLACE FUNCTION rebuild_designation_all_listing_changes_mview( - taxonomy taxonomies, designation designations, events_ids INT[] -) RETURNS void - LANGUAGE plpgsql - AS $$ - DECLARE - all_lc_table_name TEXT; - tmp_lc_table_name TEXT; - tc_table_name TEXT; - sql TEXT; - BEGIN - SELECT listing_changes_mview_name('all', designation.name, events_ids) - INTO all_lc_table_name; - SELECT listing_changes_mview_name('tmp', designation.name, events_ids) - INTO tmp_lc_table_name; - - SELECT LOWER(taxonomy.name) || '_taxon_concepts_and_ancestors_view' INTO tc_table_name; - - EXECUTE 'DROP TABLE IF EXISTS ' || tmp_lc_table_name || ' CASCADE'; - - sql := 'CREATE TEMP TABLE ' || tmp_lc_table_name || ' AS +CREATE OR REPLACE VIEW all_listing_changes_view AS -- affected_taxon_concept -- is a taxon concept that is affected by this listing change, -- even though it might not have an explicit connection to it -- (i.e. it''s an ancestor''s listing change) @@ -225,63 +205,87 @@ CREATE OR REPLACE FUNCTION rebuild_designation_all_listing_changes_mview( lc.effective_at, lc.is_current, party_distribution.geo_entity_id, - lc.excluded_taxon_concept_ids'; - - EXECUTE sql; - - EXECUTE 'CREATE INDEX ON ' || tmp_lc_table_name || ' (taxon_concept_id)'; - -- for the current listing calculation - EXECUTE 'CREATE INDEX ON ' || tmp_lc_table_name || ' (taxon_concept_id, is_current, change_type_name, inclusion_taxon_concept_id)'; - - EXECUTE 'DROP TABLE IF EXISTS ' || all_lc_table_name || ' CASCADE'; - - sql := 'CREATE TEMP TABLE ' || all_lc_table_name || ' AS - SELECT - lc.*, - tc.taxon_concept_id AS affected_taxon_concept_id, - tc.tree_distance, - -- the following ROW_NUMBER call will assign chronological order to listing changes - -- in scope of the affected taxon concept and a particular designation - ROW_NUMBER() OVER ( - PARTITION BY tc.taxon_concept_id, designation_id - ORDER BY effective_at, - CASE - WHEN change_type_name = ''DELETION'' THEN 0 - WHEN change_type_name = ''RESERVATION_WITHDRAWAL'' THEN 1 - WHEN change_type_name = ''ADDITION'' THEN 2 - WHEN change_type_name = ''RESERVATION'' THEN 3 - WHEN change_type_name = ''EXCEPTION'' THEN 4 - END, - tree_distance - )::INT AS timeline_position - FROM ' || tmp_lc_table_name || ' lc - JOIN ' || tc_table_name || ' tc - ON lc.taxon_concept_id = tc.ancestor_taxon_concept_id'; + lc.excluded_taxon_concept_ids +; - EXECUTE sql; +CREATE OR REPLACE FUNCTION rebuild_designation_all_listing_changes_mview( + taxonomy taxonomies, + designation designations, + events_ids INT[] +) RETURNS void + LANGUAGE plpgsql +AS $rebuild_designation_all_listing_changes_mview$ + DECLARE + all_lc_table_name TEXT; + tmp_lc_table_name TEXT; + tc_table_name TEXT; + sql TEXT; + BEGIN + SELECT listing_changes_mview_name('all', designation.name, events_ids) + INTO all_lc_table_name; + SELECT listing_changes_mview_name('tmp', designation.name, events_ids) + INTO tmp_lc_table_name; - EXECUTE 'CREATE INDEX ON ' || all_lc_table_name || ' (designation_id, timeline_position, affected_taxon_concept_id)'; - EXECUTE 'CREATE INDEX ON ' || all_lc_table_name || ' (affected_taxon_concept_id, inclusion_taxon_concept_id)'; - EXECUTE 'CREATE INDEX ON ' || all_lc_table_name || ' (id, affected_taxon_concept_id)'; + SELECT LOWER(taxonomy.name) || '_taxon_concepts_and_ancestors_view' + INTO tc_table_name; - -- make the tree distance reflect distance from inclusion (Rhinopittecus roxellana) - sql := 'UPDATE ' || all_lc_table_name - || ' SET tree_distance = tc.tree_distance - FROM ' || all_lc_table_name || ' alc - JOIN ' || tc_table_name || ' tc - ON alc.inclusion_taxon_concept_id = tc.ancestor_taxon_concept_id - AND alc.affected_taxon_concept_id = tc.taxon_concept_id - WHERE alc.id = ' || all_lc_table_name || '.id - AND alc.affected_taxon_concept_id = ' || all_lc_table_name || '.affected_taxon_concept_id'; + -- First, build the temp table + EXECUTE format( + $format$ + CREATE TABLE %I AS + SELECT * + FROM all_listing_changes_view + WHERE designation_id = %L + %S + $format$, + tmp_lc_table_name, + designation.id, + CASE + WHEN array_length(events_ids, 1) > 0 + THEN format( + 'AND event_ids = %L::INT[]' + event_ids + ) + ELSE '' + END + ); - EXECUTE sql; + -- Then, drop the old table and swap in the temp table. + EXECUTE format( + $format$ + DROP TABLE IF EXISTS %1$I CASCADE; + ALTER %2$I RENAME TO %1$I; + $format$, + all_lc_table_name, + tmp_lc_table_name + ); + EXECUTE format( + $format$ + CREATE INDEX ON %1$I (taxon_concept_id) WHERE is_current; + CREATE INDEX ON %1$I (taxon_concept_id); + CREATE INDEX ON %1$I ( + taxon_concept_id, + is_current, + change_type_name, + inclusion_taxon_concept_id + ); + CREATE INDEX ON %1%I (designation_id, timeline_position, affected_taxon_concept_id) + CREATE INDEX ON %1%I (affected_taxon_concept_id, inclusion_taxon_concept_id) + CREATE INDEX ON %1%I (id, affected_taxon_concept_id) + CREATE INDEX ON %1$I (affected_taxon_concept_id, id); + $format$, + all_lc_table_name + ); END; - $$; +$rebuild_designation_all_listing_changes_mview$; - COMMENT ON FUNCTION rebuild_designation_all_listing_changes_mview( - taxonomy taxonomies, designation designations, events_ids INT[] - ) IS - 'Procedure to create a helper table with all listing changes - + their included / excluded populations - + tree distance between affected taxon concept and the taxon concept this listing change applies to.'; +COMMENT ON FUNCTION rebuild_designation_all_listing_changes_mview( + taxonomy taxonomies, + designation designations, + events_ids INT[] +) IS $comment$ +Procedure to create a helper table with all listing changes ++ their included / excluded populations ++ tree distance between affected taxon concept and the taxon concept this listing change applies to. +$comment$; From 9f4d04f306e9cc1c1eda5c63c90e53c691ef48ce Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Thu, 18 Dec 2025 11:48:49 +0000 Subject: [PATCH 05/15] chore: allow parens around multiline if statements --- .rubocop.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.rubocop.yml b/.rubocop.yml index 0d729d038..3e3cd3599 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -42,6 +42,10 @@ Layout/IndentationConsistency: Exclude: - 'db/migrate/*' +Style/ParenthesesAroundCondition: + Enabled: true + AllowInMultilineConditions: true + # Configuration parameters: EnforcedStyle, SupportedStyles. # SupportedStyles: symmetrical, new_line, same_line Layout/MultilineArrayBraceLayout: @@ -109,7 +113,6 @@ Layout/ExtraSpacing: Exclude: - 'db/migrate/*' - # Detect hard tabs, no hard tabs. Layout/IndentationStyle: Enabled: true From 649c60779cc6f4285cb6ff0d014228cf1d4ce798 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Thu, 18 Dec 2025 11:50:00 +0000 Subject: [PATCH 06/15] fix: effective_at should be set correctly on EXCLUSION listings --- app/models/listing_change.rb | 79 ++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/app/models/listing_change.rb b/app/models/listing_change.rb index 8bc47e2c2..1e5250cb7 100644 --- a/app/models/listing_change.rb +++ b/app/models/listing_change.rb @@ -171,6 +171,7 @@ def duplicates(comparison_attributes_override = {}) comparison_attributes.merge(comparison_attributes_override.symbolize_keys) ) ) + if party_listing_distribution relation = relation.includes(:party_listing_distribution).references(:party_listing_distribution).where( party_listing_distribution.comparison_conditions( @@ -178,11 +179,13 @@ def duplicates(comparison_attributes_override = {}) ) ) end + if annotation relation = relation.includes(:annotation).references(:annotation).where( annotation.comparison_conditions ) end + relation end @@ -223,22 +226,11 @@ def event_designation_mismatch end end - def listing_change_before_save_callback - # check if annotation should be deleted - if annotation && - annotation.short_note_en.blank? && - annotation.short_note_fr.blank? && - annotation.short_note_es.blank? && - annotation.full_note_en.blank? && - annotation.full_note_fr.blank? && - annotation.full_note_es.blank? - ann = annotation - self.annotation = nil - if ann.reload.listing_changes.empty? - ann.delete - end - end - + ## + # Called before save: if either excluded_geo_entities_ids or + # excluded_taxon_concepts_ids are set, create or replace ListingChanges with + # type `EXCEPTION` linked to this ListingChange (as the parent) accordingly. + def populate_exceptions_from_exclusions original_change_type = ChangeType.find(change_type_id) @excluded_geo_entities_ids = @excluded_geo_entities_ids && @@ -251,7 +243,6 @@ def listing_change_before_save_callback return self if @excluded_geo_entities_ids.nil? && @excluded_taxon_concepts_ids.nil? - new_exclusions = [] exclusion_change_type = ChangeType.find_by( name: ChangeType::EXCEPTION, designation_id: original_change_type.designation_id ) @@ -259,26 +250,62 @@ def listing_change_before_save_callback # geographic exclusions excluded_geo_entities = if @excluded_geo_entities_ids.present? - new_exclusions << ListingChange.new( - change_type_id: exclusion_change_type.id, - species_listing_id: species_listing_id, - taxon_concept_id: taxon_concept_id, - geo_entity_ids: @excluded_geo_entities_ids - ) + [ + ListingChange.new( + change_type_id: exclusion_change_type.id, + species_listing_id: species_listing_id, + taxon_concept_id: taxon_concept_id, + geo_entity_ids: @excluded_geo_entities_ids, + effective_at: effective_at + ) + ] + else + [] end # taxonomic exclusions excluded_taxon_concepts = if @excluded_taxon_concepts_ids.present? @excluded_taxon_concepts_ids.map do |id| - new_exclusions << ListingChange.new( + ListingChange.new( change_type_id: exclusion_change_type.id, species_listing_id: species_listing_id, - taxon_concept_id: id + taxon_concept_id: id, + effective_at: effective_at ) end + else + [] end - self.exclusions = new_exclusions + self.exclusions = excluded_taxon_concepts + excluded_geo_entities + + self + end + + ## + # Before save, check if annotation should be deleted + def delete_empty_annotation + if ( + annotation && + annotation.short_note_en.blank? && + annotation.short_note_fr.blank? && + annotation.short_note_es.blank? && + annotation.full_note_en.blank? && + annotation.full_note_fr.blank? && + annotation.full_note_es.blank? + ) + ann = annotation + self.annotation = nil + + if ann.reload.listing_changes.empty? + ann.delete + end + end + end + + def listing_change_before_save_callback + delete_empty_annotation + populate_exceptions_from_exclusions end end From 7e340b5ef511d8087538928e14589e1d2437fd10 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Fri, 19 Dec 2025 17:37:36 +0000 Subject: [PATCH 07/15] chore: indentation only --- .../005_rebuild_designation_listing_changes_mview.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/db/mviews/005_rebuild_designation_listing_changes_mview.sql b/db/mviews/005_rebuild_designation_listing_changes_mview.sql index cc973e3b6..f981cfc36 100644 --- a/db/mviews/005_rebuild_designation_listing_changes_mview.sql +++ b/db/mviews/005_rebuild_designation_listing_changes_mview.sql @@ -35,15 +35,15 @@ CREATE OR REPLACE FUNCTION rebuild_designation_listing_changes_mview( deletion_id INT; BEGIN SELECT listing_changes_mview_name('all', designation.name, events_ids) - INTO all_lc_table_name; + INTO all_lc_table_name; SELECT listing_changes_mview_name('tmp', designation.name, events_ids) - INTO raw_lc_table_name; + INTO raw_lc_table_name; SELECT listing_changes_mview_name('tmp_cascaded', designation.name, events_ids) - INTO tmp_lc_table_name; + INTO tmp_lc_table_name; SELECT listing_changes_mview_name('child', designation.name, events_ids) - INTO lc_table_name; + INTO lc_table_name; SELECT listing_changes_mview_name(NULL, designation.name, events_ids) - INTO master_lc_table_name; + INTO master_lc_table_name; RAISE INFO 'Creating %', tmp_lc_table_name; From 541a2d2e5ab1e3be94d162753edbf29bffda3a4b Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Fri, 19 Dec 2025 17:38:04 +0000 Subject: [PATCH 08/15] wip: idea-refactor-cascade --- idea-refactor-cascade.sql | 636 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 636 insertions(+) create mode 100644 idea-refactor-cascade.sql diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql new file mode 100644 index 000000000..7c16c10d8 --- /dev/null +++ b/idea-refactor-cascade.sql @@ -0,0 +1,636 @@ +BEGIN; +DROP VIEW IF EXISTS taxon_ancestors_dv CASCADE; +CREATE OR REPLACE VIEW taxon_ancestors_dv AS + WITH RECURSIVE ancestries AS ( + -- start with the root nodes + SELECT + "taxonomy_id", + "id", + "rank_id", + '{}'::BIGINT[] AS "ancestor_ids" + FROM "taxon_concepts" roots + WHERE "parent_id" IS NULL + AND "name_status" = 'A' + UNION ALL + SELECT + "child"."taxonomy_id", + "child"."id", + "child"."rank_id", + "parent"."ancestor_ids" || ARRAY["parent"."id"::BIGINT] + FROM "ancestries" parent + JOIN "taxon_concepts" child + ON "child"."taxonomy_id" = "parent"."taxonomy_id" + AND "child"."parent_id" = "parent"."id" + ), taxon_ancestors AS ( + SELECT + "taxonomy_id", "id", "rank_id", + unnest(ancestor_ids) AS ancestor_id, + "ancestor_ids" + FROM "ancestries" + ), rank_depths AS ( + SELECT + "id" AS "rank_id", + ROW_NUMBER() OVER() AS "rank_depth" + FROM ( + SELECT ( + '{' || translate(taxonomic_position, '.', ',') || '}' + )::INT[], + * + FROM ranks + ORDER BY 1 + ) r + ), rank_distances AS ( + SELECT + "ancestor_rank"."rank_id" ancestor_rank_id, + "ancestor_rank"."rank_depth" ancestor_rank_depth, + "descendant_rank"."rank_id" descendant_rank_id, + "descendant_rank"."rank_depth" descendant_rank_depth, + "descendant_rank"."rank_depth" - "ancestor_rank"."rank_depth" AS rank_distance + FROM "rank_depths" ancestor_rank + JOIN "rank_depths" descendant_rank + ON "ancestor_rank"."rank_depth" <= "descendant_rank"."rank_depth" + ) + SELECT + "ta"."taxonomy_id", + "ta"."id", + "ta"."rank_id", + "ta"."ancestor_ids", + "ta"."ancestor_ids"[( + array_position("ta"."ancestor_ids", "ta"."ancestor_id") + ):] "path_ids", + "ta"."ancestor_id", + "ancestor_rank_id", + "ancestor_rank_depth", + "descendant_rank_depth" AS "rank_depth", + "rd"."rank_distance" + FROM "taxon_ancestors" ta + JOIN "taxon_concepts" atc + ON "ancestor_id" = "atc"."id" + JOIN "rank_distances" rd + ON "rd"."descendant_rank_id" = "ta"."rank_id" + AND "rd"."ancestor_rank_id" = "atc"."rank_id" +; + +DROP VIEW IF EXISTS implied_listing_changes_view CASCADE; +CREATE OR REPLACE VIEW implied_listing_changes_view AS +-- affected_taxon_concept is a taxon concept that is affected by this listing +-- change, even though it might not have an explicit connection to it +-- (i.e. it is an ancestor's listing change). +WITH designations_and_intervals AS ( + SELECT + designations.id designation_id, + designations.name designation_name, + designations.taxonomy_id taxonomy_id, + intervals.start_date interval_start_date, + intervals.end_date interval_end_date, + intervals.events_ids interval_events_ids + FROM designations + LEFT JOIN eu_regulations_applicability_view intervals + ON designations.name = 'EU' +), listing_changes_with_exceptions AS ( + -- the purpose of this CTE is to aggregate excluded taxon concept ids + SELECT + listing_changes.id, + change_types.designation_id, + change_types.name AS change_type_name, + listing_changes.taxon_concept_id, + listing_changes.species_listing_id, + listing_changes.change_type_id, + listing_changes.inclusion_taxon_concept_id, + listing_changes.event_id, + listing_changes.effective_at::DATE, + listing_changes.is_current, + ARRAY_AGG_NOTNULL(taxonomic_exceptions.taxon_concept_id) AS excluded_taxon_concept_ids + FROM listing_changes + LEFT JOIN listing_changes taxonomic_exceptions + ON listing_changes.id = taxonomic_exceptions.parent_id + AND listing_changes.taxon_concept_id != taxonomic_exceptions.taxon_concept_id + JOIN change_types ON change_types.id = listing_changes.change_type_id + JOIN designations_and_intervals + ON designations_and_intervals.designation_id = change_types.designation_id + AND ( + designations_and_intervals IS NULL + OR ARRAY_UPPER(designations_and_intervals.interval_events_ids, 1) IS NULL + OR listing_changes.event_id = ANY(designations_and_intervals.interval_events_ids) + ) + GROUP BY + listing_changes.id, + change_types.designation_id, + change_types.name, + listing_changes.taxon_concept_id, + listing_changes.species_listing_id, + listing_changes.change_type_id, + listing_changes.inclusion_taxon_concept_id, + listing_changes.event_id, + listing_changes.effective_at::DATE, + listing_changes.is_current +), aggregate_lc AS ( +-- the purpose of this CTE is to aggregate listed and excluded populations + SELECT + lc.id, + lc.designation_id, + lc.change_type_name, + lc.taxon_concept_id, + lc.species_listing_id, + lc.change_type_id, + lc.inclusion_taxon_concept_id, + lc.event_id, + lc.effective_at, + lc.is_current, + lc.excluded_taxon_concept_ids, + party_distribution.geo_entity_id AS party_id, + ARRAY_AGG_NOTNULL(listing_distributions.geo_entity_id) AS listed_geo_entities_ids, + ARRAY_AGG_NOTNULL(excluded_distributions.geo_entity_id) AS excluded_geo_entities_ids + FROM listing_changes_with_exceptions lc + LEFT JOIN listing_distributions + ON lc.id = listing_distributions.listing_change_id + AND NOT listing_distributions.is_party + LEFT JOIN listing_distributions party_distribution + ON lc.id = party_distribution.listing_change_id + AND party_distribution.is_party + LEFT JOIN listing_changes population_exceptions + ON lc.id = population_exceptions.parent_id + AND lc.taxon_concept_id = population_exceptions.taxon_concept_id + LEFT JOIN listing_distributions excluded_distributions + ON population_exceptions.id = excluded_distributions.listing_change_id + AND NOT excluded_distributions.is_party + GROUP BY + lc.id, + lc.designation_id, + lc.change_type_name, + lc.taxon_concept_id, + lc.species_listing_id, + lc.change_type_id, + lc.inclusion_taxon_concept_id, + lc.event_id, + lc.effective_at, + lc.is_current, + party_distribution.geo_entity_id, + lc.excluded_taxon_concept_ids +) +SELECT + lc.*, + tc.taxon_concept_id AS affected_taxon_concept_id, + -- Make the tree distance reflect distance from inclusion + -- TODO TEST Rhinopittecus roxellana + COALESCE(itc.tree_distance, tc.tree_distance) tree_distance, + -- the following ROW_NUMBER call will assign chronological order to listing changes + -- in scope of the affected taxon concept and a particular designation + ROW_NUMBER() OVER ( + PARTITION BY + tc.taxon_concept_id, + designation_id, + -- fix Agapornis fischeri, which has + -- + -- CH R..W + -- LI R..W + lc.party_id + ORDER BY + effective_at, + CASE + WHEN change_type_name = 'DELETION' THEN 0 + WHEN change_type_name = 'RESERVATION_WITHDRAWAL' THEN 1 + WHEN change_type_name = 'ADDITION' THEN 2 + WHEN change_type_name = 'RESERVATION' THEN 3 + WHEN change_type_name = 'EXCEPTION' THEN 4 + END, + -- Before 2026 this was ascending, but we want e.g. species listings to take + -- priority over genus listings. + tc.tree_distance DESC + -- ??? OR would it be better to + -- COALESCE(itc.tree_distance, tc.tree_distance) tree_distance + )::INT AS timeline_position +FROM aggregate_lc lc +JOIN taxon_concepts_and_ancestors_mview tc + ON lc.taxon_concept_id = tc.ancestor_taxon_concept_id +LEFT JOIN taxon_concepts_and_ancestors_mview itc + ON lc.inclusion_taxon_concept_id = itc.ancestor_taxon_concept_id + AND lc.taxon_concept_id = itc.taxon_concept_id +; + +DROP VIEW IF EXISTS taxon_concepts_with_distributions_and_ancestors CASCADE; +CREATE OR REPLACE VIEW taxon_concepts_with_distributions_and_ancestors AS +SELECT + tc.*, + (ancestor_fields->'kingdom_id')::INTEGER AS kingdom_id, + (ancestor_fields->'phylum_id')::INTEGER AS phylum_id, + (ancestor_fields->'class_id')::INTEGER AS class_id, + (ancestor_fields->'order_id')::INTEGER AS order_id, + (ancestor_fields->'family_id')::INTEGER AS family_id, + (ancestor_fields->'subfamily_id')::INTEGER AS subfamily_id, + (ancestor_fields->'genus_id')::INTEGER AS genus_id, + (ancestor_fields->'species_id')::INTEGER AS species_id, + (ancestor_fields->'subspecies_id')::INTEGER AS subspecies_id, + td.geo_entity_ids +FROM taxon_concepts tc +JOIN ( + SELECT + ta.id, + hstore( + array_agg(ARRAY[lower(r.name) || '_id', ancestor_id::text]) + ) AS ancestor_fields + FROM taxon_ancestors_dv ta + JOIN ranks r ON ta.ancestor_rank_id = r.id + GROUP BY ta.id +) ta ON tc.id = ta.id +JOIN ( + SELECT + taxon_concept_id "id", + array_agg(geo_entity_id) AS geo_entity_ids + FROM distributions d + GROUP BY taxon_concept_id +) td ON tc.id = td.id; + +DROP VIEW IF EXISTS applicable_listing_changes_timeline_view CASCADE; +CREATE OR REPLACE VIEW applicable_listing_changes_timeline_view AS +WITH RECURSIVE listing_changes_timeline AS ( + SELECT lc.id, + designation_id, + affected_taxon_concept_id AS original_taxon_concept_id, + taxon_concept_id AS current_taxon_concept_id, + CASE -- context + WHEN inclusion_taxon_concept_id IS NULL + THEN HSTORE(species_listing_id::TEXT, taxon_concept_id::TEXT) + ELSE HSTORE(species_listing_id::TEXT, inclusion_taxon_concept_id::TEXT) + END AS context, + inclusion_taxon_concept_id, + party_id, + species_listing_id, + change_type_id, + event_id, + effective_at, + is_current, + tree_distance AS context_tree_distance, + timeline_position, + CASE -- is_applicable + WHEN ( + -- there are listed populations + ARRAY_UPPER(listed_geo_entities_ids, 1) IS NOT NULL + -- and the taxon has its own distribution and does not occur in any of them + AND ARRAY_UPPER(taxon_concepts_mview.geo_entity_ids, 1) IS NOT NULL + AND NOT listed_geo_entities_ids && taxon_concepts_mview.geo_entity_ids + ) OR ( + -- when all populations are excluded + ARRAY_UPPER(excluded_geo_entities_ids, 1) IS NOT NULL + AND ARRAY_UPPER(taxon_concepts_mview.geo_entity_ids, 1) IS NOT NULL + AND excluded_geo_entities_ids @> taxon_concepts_mview.geo_entity_ids + ) + THEN FALSE + WHEN ARRAY_UPPER(excluded_taxon_concept_ids, 1) IS NOT NULL + -- if taxon or any of its ancestors is excluded from this listing + AND excluded_taxon_concept_ids && ARRAY[ + affected_taxon_concept_id, + taxon_concepts_mview.kingdom_id, + taxon_concepts_mview.phylum_id, + taxon_concepts_mview.class_id, + taxon_concepts_mview.order_id, + taxon_concepts_mview.family_id, + taxon_concepts_mview.genus_id, + taxon_concepts_mview.species_id + ] + THEN FALSE + ELSE TRUE + END AS is_applicable + FROM implied_listing_changes_view lc + JOIN taxon_concepts_with_distributions_and_ancestors taxon_concepts_mview + ON lc.affected_taxon_concept_id = taxon_concepts_mview.id + WHERE timeline_position = 1 + -- AND lc.affected_taxon_concept_id = $1 + + UNION + + SELECT + hi.id, + hi.designation_id, + listing_changes_timeline.original_taxon_concept_id, + hi.taxon_concept_id, + CASE -- context + WHEN hi.inclusion_taxon_concept_id IS NOT NULL + AND ( + AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + OR listing_changes_timeline.context = ''::HSTORE + ) + THEN HSTORE(hi.species_listing_id::TEXT, hi.inclusion_taxon_concept_id::TEXT) + WHEN change_types.name = 'DELETION' + AND hi.taxon_concept_id = hi.affected_taxon_concept_id + THEN listing_changes_timeline.context - ARRAY[hi.species_listing_id::TEXT] + WHEN change_types.name = 'DELETION' + THEN listing_changes_timeline.context - HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- if it is a new listing at closer level that replaces an older listing, wipe out the context + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + AND hi.effective_at > listing_changes_timeline.effective_at + AND change_types.name = 'ADDITION' + THEN HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- if it is a same day split listing we don''t want to wipe the other part of the split from the context + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + AND change_types.name = 'ADDITION' + THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + WHEN hi.tree_distance <= listing_changes_timeline.context_tree_distance + AND hi.affected_taxon_concept_id = hi.taxon_concept_id + AND change_types.name = 'ADDITION' + THEN HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- changing this to <= breaks Ursus arctos isabellinus + WHEN hi.tree_distance <= listing_changes_timeline.context_tree_distance + AND change_types.name = 'ADDITION' + THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + ELSE listing_changes_timeline.context + END AS context, + hi.inclusion_taxon_concept_id, + hi.party_id, + hi.species_listing_id, + hi.change_type_id, + hi.event_id, + hi.effective_at, + hi.is_current, + CASE -- context_tree_distance + WHEN ( + hi.inclusion_taxon_concept_id IS NOT NULL + AND AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + ) OR hi.tree_distance < listing_changes_timeline.context_tree_distance + THEN hi.tree_distance + ELSE listing_changes_timeline.context_tree_distance + END AS context_tree_distance, + hi.timeline_position, + CASE -- is applicable + WHEN ( + -- there are listed populations + ARRAY_UPPER(hi.listed_geo_entities_ids, 1) IS NOT NULL + -- and the taxon has its own distribution and does not occur in any of them + AND ARRAY_UPPER(taxon_concepts_mview.geo_entity_ids, 1) IS NOT NULL + AND NOT hi.listed_geo_entities_ids && taxon_concepts_mview.geo_entity_ids + ) OR ( + -- when all populations are excluded + ARRAY_UPPER(hi.excluded_geo_entities_ids, 1) IS NOT NULL + AND ARRAY_UPPER(taxon_concepts_mview.geo_entity_ids, 1) IS NOT NULL + AND hi.excluded_geo_entities_ids @> taxon_concepts_mview.geo_entity_ids + ) + THEN FALSE + WHEN ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NOT NULL + -- if taxon or any of its ancestors is excluded from this listing + AND hi.excluded_taxon_concept_ids && ARRAY[ + hi.affected_taxon_concept_id, + taxon_concepts_mview.kingdom_id, + taxon_concepts_mview.phylum_id, + taxon_concepts_mview.class_id, + taxon_concepts_mview.order_id, + taxon_concepts_mview.family_id, + taxon_concepts_mview.genus_id, + taxon_concepts_mview.species_id + ] + THEN FALSE + WHEN listing_changes_timeline.context -> hi.species_listing_id::TEXT = hi.taxon_concept_id::TEXT + OR hi.taxon_concept_id = listing_changes_timeline.original_taxon_concept_id + -- this line to make Moschus leucogaster happy + OR AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + THEN TRUE + WHEN listing_changes_timeline.context = ''::HSTORE --this would be the case when deleted + AND ( + ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NOT NULL + AND NOT hi.excluded_taxon_concept_ids && ARRAY[hi.affected_taxon_concept_id] + OR ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NULL + ) + AND hi.inclusion_taxon_concept_id IS NULL + AND hi.change_type_name = 'ADDITION' + THEN TRUE -- allows for re-listing + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + THEN TRUE + ELSE FALSE + END AS is_applicable + FROM implied_listing_changes_view hi + JOIN listing_changes_timeline + ON hi.designation_id = listing_changes_timeline.designation_id + AND listing_changes_timeline.original_taxon_concept_id = hi.affected_taxon_concept_id + AND listing_changes_timeline.timeline_position + 1 = hi.timeline_position + JOIN change_types + ON hi.change_type_id = change_types.id + JOIN taxon_concepts_with_distributions_and_ancestors taxon_concepts_mview + ON hi.affected_taxon_concept_id = taxon_concepts_mview.id +) +SELECT * FROM listing_changes_timeline; + +drop table if exists tmp_all_listing_changes_timeline_matview; +drop table if exists applicable_listing_changes_timeline_dt; +drop table if exists applicable_listing_changes_timeline_mt; + +create materialized view applicable_listing_changes_timeline_mv + as select * from applicable_listing_changes_timeline_view +; + +create index on applicable_listing_changes_timeline_mv ( + current_taxon_concept_id, designation_id, change_type_id, party_id, effective_at +); + +create index on applicable_listing_changes_timeline_mv ( + current_taxon_concept_id, change_type_id, effective_at +); + +create index on applicable_listing_changes_timeline_mv ( + species_listing_id, current_taxon_concept_id +); + +create index on applicable_listing_changes_timeline_mv ( + current_taxon_concept_id, designation_id, party_id +); + +-- explain analyse +-- create table tmp_all_listing_changes_timeline_dt +-- as select * from applicable_listing_changes_timeline_view; + +--- 255s +--- 909344 +--- cites_listing_changes_mview + eu_listing_changes_mview + cms_listing_changes_mview are 1165459 +--- close but not quite + +DROP VIEW IF EXISTS all_listing_changes_and_synthetics_view; +-- TODO: Why is this not applied to CMS? +CREATE OR REPLACE VIEW all_listing_changes_and_synthetics_view ( + -- want to make sure this is the same set of columns as the previous view + "id", + "designation_id", + "original_taxon_concept_id", + "current_taxon_concept_id", + "context", + "inclusion_taxon_concept_id", + "party_id", + "species_listing_id", + "change_type_id", + "event_id", + "effective_at", + "is_current" + "context_tree_distance", + "timeline_position", + "is_applicable", + -- plus a few + "explicit_change", + "show_in_timeline", + "show_in_downloads", + "show_in_history" +) AS +-- find inherited listing changes superceded by own listing changes +-- mark them as not current in context of the child and add fake deletion records +-- so that those inherited events are terminated properly on the timelines +WITH addition_change_types AS ( + SELECT * + FROM "change_types" + WHERE "name" = 'ADDITION' +), deletion_change_types AS ( + SELECT * + FROM "change_types" + WHERE "name" = 'DELETION' +), exception_change_types AS ( + SELECT * + FROM "change_types" + WHERE "name" = 'EXCEPTION' +), prev_lc AS ( + SELECT + lc.id, + lc.designation_id, + lc.original_taxon_concept_id, + lc.current_taxon_concept_id, + lc.context, + lc.inclusion_taxon_concept_id, + lc.party_id, + lc.species_listing_id, + lc.change_type_id, + lc.event_id, + next_lc.effective_at, + FALSE AS is_current, + lc.context_tree_distance, + lc.timeline_position, + lc.is_applicable, + ( + lc.species_listing_id != next_lc.species_listing_id + ) AS appendix_change + FROM addition_change_types ct + JOIN applicable_listing_changes_timeline_mv lc + ON lc.change_type_id = ct.id + JOIN applicable_listing_changes_timeline_mv next_lc + ON lc.current_taxon_concept_id = next_lc.current_taxon_concept_id + AND lc.change_type_id = next_lc.change_type_id + AND lc.effective_at < next_lc.effective_at + AND next_lc.party_id IS NOT DISTINCT FROM lc.party_id + WHERE ( + ( + -- own listing change preceded by inherited listing change + next_lc.original_taxon_concept_id = next_lc.current_taxon_concept_id + AND lc.original_taxon_concept_id != lc.current_taxon_concept_id + ) OR ( + -- own listing change preceded by own listing change if it is a not current inclusion + next_lc.original_taxon_concept_id = next_lc.current_taxon_concept_id + AND lc.original_taxon_concept_id = lc.current_taxon_concept_id + AND lc.inclusion_taxon_concept_id IS NOT NULL + AND NOT lc.is_current + ) OR ( + -- inherited listing change preceded by inherited listing change + next_lc.original_taxon_concept_id != next_lc.current_taxon_concept_id + AND lc.original_taxon_concept_id != lc.current_taxon_concept_id + ) OR ( + -- inherited listing change preceded by own listing change if it is a not current inclusion + -- in the same taxon concept as the current listing change + next_lc.original_taxon_concept_id != next_lc.current_taxon_concept_id + AND lc.original_taxon_concept_id = lc.current_taxon_concept_id + AND lc.inclusion_taxon_concept_id IS NOT NULL + AND ( + lc.inclusion_taxon_concept_id = next_lc.original_taxon_concept_id + OR NOT lc.is_current + ) + ) + ) +), fake_deletions AS ( + -- note: this generates records without an id + -- this is ok for the timelines, and those records are not used elsewhere + -- ids in this view are not unique anyway, since any id + -- from listing changes can occur multiple times + SELECT + -- TODO: test if multiple appendix changes work + DISTINCT ON ( + lc.original_taxon_concept_id, + lc.current_taxon_concept_id, + lc.designation_id, + lc.species_listing_id, + lc.party_id + ) + 0 - lc.id AS id, + lc.designation_id AS designation_id, + lc.original_taxon_concept_id AS original_taxon_concept_id, + lc.current_taxon_concept_id AS current_taxon_concept_id, + ''::hstore AS context, + NULL::INT AS inclusion_taxon_concept_id, + lc.party_id AS party_id, + lc.species_listing_id AS species_listing_id, + ct.id AS change_type_id, + lc.event_id AS event_id, + lc.effective_at AS effective_at, + TRUE AS is_current, + lc.context_tree_distance AS context_tree_distance, + lc.timeline_position AS timeline_position, + TRUE AS is_applicable, + FALSE AS explicit_change, + TRUE AS show_in_timeline, + FALSE AS show_in_downloads, + FALSE AS show_in_history + FROM prev_lc lc + JOIN deletion_change_types ct + ON ct.designation_id = lc.designation_id + WHERE appendix_change +) +-- SELECT +-- lc.id, +-- lc.designation_id, +-- lc.original_taxon_concept_id, +-- lc.current_taxon_concept_id, +-- lc.context, +-- lc.inclusion_taxon_concept_id, +-- lc.party_id, +-- lc.species_listing_id, +-- lc.change_type_id, +-- lc.event_id, +-- lc.effective_at, +-- CASE +-- WHEN terminated_lc.id IS NOT NULL THEN TRUE +-- ELSE lc.is_current +-- END AS is_current, +-- lc.context_tree_distance, +-- lc.timeline_position, +-- lc.is_applicable, +-- TRUE AS explicit_change, +-- xct.id IS NULL AS show_in_timeline, +-- xct.id IS NULL AS show_in_history, +-- xct.id IS NULL AS show_in_downloads +-- FROM applicable_listing_changes_timeline_mv lc +-- -- if the row exists in prev_lc then it has been superseded +-- LEFT JOIN prev_lc terminated_lc +-- ON terminated_lc.id = lc.id +-- AND terminated_lc.current_taxon_concept_id = lc.current_taxon_concept_id +-- LEFT JOIN exception_change_types xct +-- ON lc.change_type_id = xct.id +-- UNION ALL +SELECT + id, + designation_id, + original_taxon_concept_id, + current_taxon_concept_id, + context, + inclusion_taxon_concept_id, + party_id, + species_listing_id, + change_type_id, + event_id, + effective_at, + is_current, + context_tree_distance, + timeline_position, + is_applicable, + explicit_change, + show_in_timeline, + show_in_history, + show_in_downloads +FROM fake_deletions +; + + +drop table if exists synth_listing_changes_timeline_dt; +explain analyse +create table synth_listing_changes_timeline_dt +as select * from all_listing_changes_and_synthetics_view where designation_id = 1; + From 574620cdd5e9ce12cc30ad820d73620e15190bdc Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Mon, 16 Feb 2026 08:53:12 +0000 Subject: [PATCH 09/15] wipnoideawhy --- app/models/document.rb | 42 +++++++++++++++++++-------------------- app/models/user.rb | 13 ++++++++++-- idea-refactor-cascade.sql | 30 ++++++++++++++++++---------- 3 files changed, 51 insertions(+), 34 deletions(-) diff --git a/app/models/document.rb b/app/models/document.rb index 5ef7d16cc..0d0e0bfe6 100644 --- a/app/models/document.rb +++ b/app/models/document.rb @@ -56,27 +56,27 @@ class Document < ApplicationRecord include PgSearch::Model ACCEPTED_CONTENT_TYPES = [ - "image/jpeg", # jpg - "image/jpeg", # jpeg - "image/gif", # gif - "image/png", # png - "image/bmp", # bmp - "image/tiff", # tif - "image/tiff", # tiff - "application/vnd.ms-powerpoint", # ppt - "application/vnd.openxmlformats-officedocument.presentationml.presentation", # pptx - "application/vnd.ms-excel", # xls - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # xlsx - "application/rtf", # rtf - "text/plain", # txt - "application/msword", # doc - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # docx - "application/pdf", # pdf - "text/csv", # csv - "text/tab-separated-values", # tsv - "application/vnd.oasis.opendocument.text", # odt - "application/vnd.oasis.opendocument.spreadsheet", # ods - "application/vnd.oasis.opendocument.presentation" # odp + 'image/jpeg', # jpg + 'image/jpeg', # jpeg + 'image/gif', # gif + 'image/png', # png + 'image/bmp', # bmp + 'image/tiff', # tif + 'image/tiff', # tiff + 'application/vnd.ms-powerpoint', # ppt + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', # pptx + 'application/vnd.ms-excel', # xls + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', # xlsx + 'application/rtf', # rtf + 'text/plain', # txt + 'application/msword', # doc + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', # docx + 'application/pdf', # pdf + 'text/csv', # csv + 'text/tab-separated-values', # tsv + 'application/vnd.oasis.opendocument.text', # odt + 'application/vnd.oasis.opendocument.spreadsheet', # ods + 'application/vnd.oasis.opendocument.presentation' # odp ].freeze pg_search_scope :search_by_title, against: :title, diff --git a/app/models/user.rb b/app/models/user.rb index 66fcf5e2c..956ac9532 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -116,11 +116,13 @@ def can_be_deleted? Reference, TaxonConceptReference, DistributionReference, Trade::AnnualReportUpload, Trade::Shipment ] + for i in 0..tracked_objects.length - 1 if tracked_objects[i].where([ 'created_by_id = :id OR updated_by_id = :id', id: self.id ]).limit(1).count > 0 return false end end + true end @@ -150,19 +152,25 @@ def send_devise_notification(notification, *) def sync_with_captive_breeding_db # Only interested if role, name, encrypted_password, and email is changed. # Or user deleted. - return unless (previous_changes.keys & %w[email role name encrypted_password]).present? || destroyed? + return unless + previous_changes.keys.intersect?( + %w[email role name encrypted_password] + ).present? || destroyed? role_was = previous_changes['role']&.first + action = if destroyed? # User record deleted. :delete elsif is_elibrary_user? || is_manager? # Is admin or elibrary. :create_or_update - elsif role_was == MANAGER || role_was == ELIBRARY_USER # Was admin or elibrary. + elsif role_was == MANAGER || role_was == ELIBRARY_USER # rubocop:disable Lint/DuplicateBranch + # Was admin or elibrary, but (because previous condition not met), is not any more :delete else :none end + return if action == :none email_was = previous_changes['email']&.first @@ -180,6 +188,7 @@ def sync_with_captive_breeding_db CaptiveBreedingUser.create!(email:, name:, encrypted_password:) else # Update the first CB user record, which is using the new email address (if changed). existing_cb_users.first.update!(email:, name:, encrypted_password:) + if existing_cb_users[1].present? # Duplicate user!? Remove it? # TODO: Do not have requirement for this yet, not sure is it safe to delete. # https://unep-wcmc.codebasehq.com/projects/cites-support-maintenance/tickets/241 diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql index 7c16c10d8..294324a3a 100644 --- a/idea-refactor-cascade.sql +++ b/idea-refactor-cascade.sql @@ -248,17 +248,21 @@ WITH RECURSIVE listing_changes_timeline AS ( designation_id, affected_taxon_concept_id AS original_taxon_concept_id, taxon_concept_id AS current_taxon_concept_id, - CASE -- context - WHEN inclusion_taxon_concept_id IS NULL - THEN HSTORE(species_listing_id::TEXT, taxon_concept_id::TEXT) - ELSE HSTORE(species_listing_id::TEXT, inclusion_taxon_concept_id::TEXT) - END AS context, inclusion_taxon_concept_id, party_id, species_listing_id, change_type_id, event_id, effective_at, + CASE -- context + WHEN inclusion_taxon_concept_id IS NULL + THEN HSTORE(species_listing_id::TEXT, taxon_concept_id::TEXT) + ELSE HSTORE(species_listing_id::TEXT, inclusion_taxon_concept_id::TEXT) + END AS context, + CASE WHEN + THEN + ELSE + HSTORE(tree_distance::TEXT, lc.id::TEXT) AS listing_change_ids_by_distance, is_current, tree_distance AS context_tree_distance, timeline_position, @@ -304,6 +308,12 @@ WITH RECURSIVE listing_changes_timeline AS ( hi.designation_id, listing_changes_timeline.original_taxon_concept_id, hi.taxon_concept_id, + hi.inclusion_taxon_concept_id, + hi.party_id, + hi.species_listing_id, + hi.change_type_id, + hi.event_id, + hi.effective_at, CASE -- context WHEN hi.inclusion_taxon_concept_id IS NOT NULL AND ( @@ -335,12 +345,10 @@ WITH RECURSIVE listing_changes_timeline AS ( THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) ELSE listing_changes_timeline.context END AS context, - hi.inclusion_taxon_concept_id, - hi.party_id, - hi.species_listing_id, - hi.change_type_id, - hi.event_id, - hi.effective_at, + listing_changes_timeline + listing_changes_timeline.listing_change_ids_by_distance || HSTORE( + tree_distance::TEXT, lc.id::TEXT + ) AS listing_change_ids_by_distance, hi.is_current, CASE -- context_tree_distance WHEN ( From f5f0f0d5d3f8c3df47e776c4546d19c4e2d6e3d5 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Mon, 16 Mar 2026 15:57:55 +0000 Subject: [PATCH 10/15] nb query plan --- idea-refactor-cascade.sql | 53 +++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql index 294324a3a..ca861fc80 100644 --- a/idea-refactor-cascade.sql +++ b/idea-refactor-cascade.sql @@ -259,10 +259,10 @@ WITH RECURSIVE listing_changes_timeline AS ( THEN HSTORE(species_listing_id::TEXT, taxon_concept_id::TEXT) ELSE HSTORE(species_listing_id::TEXT, inclusion_taxon_concept_id::TEXT) END AS context, - CASE WHEN - THEN - ELSE - HSTORE(tree_distance::TEXT, lc.id::TEXT) AS listing_change_ids_by_distance, + -- CASE WHEN + -- THEN + -- ELSE + HSTORE(tree_distance::TEXT, (lc.id)::TEXT) AS listing_change_ids_by_distance, is_current, tree_distance AS context_tree_distance, timeline_position, @@ -345,9 +345,9 @@ WITH RECURSIVE listing_changes_timeline AS ( THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) ELSE listing_changes_timeline.context END AS context, - listing_changes_timeline + -- listing_changes_timeline, listing_changes_timeline.listing_change_ids_by_distance || HSTORE( - tree_distance::TEXT, lc.id::TEXT + tree_distance::TEXT, listing_changes_timeline.id::TEXT ) AS listing_change_ids_by_distance, hi.is_current, CASE -- context_tree_distance @@ -464,7 +464,7 @@ CREATE OR REPLACE VIEW all_listing_changes_and_synthetics_view ( "change_type_id", "event_id", "effective_at", - "is_current" + "is_current", "context_tree_distance", "timeline_position", "is_applicable", @@ -642,3 +642,42 @@ explain analyse create table synth_listing_changes_timeline_dt as select * from all_listing_changes_and_synthetics_view where designation_id = 1; + +-- CTE Scan on fake_deletions (cost=2358.58..2358.61 rows=1 width=86) (actual time=359431.642..359435.274 rows=28 loops=1) +-- Filter: (designation_id = 1) +-- Rows Removed by Filter: 18 +-- CTE addition_change_types +-- -> Seq Scan on change_types (cost=0.00..1.19 rows=1 width=636) (actual time=0.007..0.011 rows=3 loops=1) +-- Filter: ((name)::text = 'ADDITION'::text) +-- Rows Removed by Filter: 12 +-- CTE deletion_change_types +-- -> Seq Scan on change_types change_types_1 (cost=0.00..1.19 rows=1 width=636) (actual time=0.012..0.014 rows=3 loops=1) +-- Filter: ((name)::text = 'DELETION'::text) +-- Rows Removed by Filter: 12 +-- CTE prev_lc +-- -> Nested Loop (cost=0.45..2356.13 rows=1 width=83) (actual time=0.143..162253.135 rows=335815170 loops=1) +-- -> Hash Join (cost=0.03..2153.73 rows=366 width=86) (actual time=0.029..150.562 rows=63307 loops=1) +-- Hash Cond: (lc.change_type_id = ct.id) +-- -> Seq Scan on applicable_listing_changes_timeline_mv lc (cost=0.00..1875.39 rows=73239 width=82) (actual time=0.007..31.230 rows=73239 loops=1) +-- -> Hash (cost=0.02..0.02 rows=1 width=4) (actual time=0.014..0.015 rows=3 loops=1) +-- Buckets: 1024 Batches: 1 Memory Usage: 9kB +-- -> CTE Scan on addition_change_types ct (cost=0.00..0.02 rows=1 width=4) (actual time=0.008..0.012 rows=3 loops=1) +-- -> Index Scan using applicable_listing_changes_ti_current_taxon_concept_id_chan_idx on applicable_listing_changes_timeline_mv next_lc (cost=0.42..0.54 rows=1 width=24) (actual time=0.016..1.573 rows=5305 loops=63307) +-- Index Cond: ((current_taxon_concept_id = lc.current_taxon_concept_id) AND (change_type_id = lc.change_type_id) AND (lc.effective_at < effective_at)) +-- Filter: ((NOT (party_id IS DISTINCT FROM lc.party_id)) AND (((original_taxon_concept_id = current_taxon_concept_id) AND (lc.original_taxon_concept_id <> lc.current_taxon_concept_id)) OR ((original_taxon_concept_id = current_taxon_concept_id) AND (lc.original_taxon_concept_id = lc.current_taxon_concept_id) AND (lc.inclusion_taxon_concept_id IS NOT NULL) AND (NOT lc.is_current)) OR ((original_taxon_concept_id <> current_taxon_concept_id) AND (lc.original_taxon_concept_id <> lc.current_taxon_concept_id)) OR ((original_taxon_concept_id <> current_taxon_concept_id) AND (lc.original_taxon_concept_id = lc.current_taxon_concept_id) AND (lc.inclusion_taxon_concept_id IS NOT NULL) AND ((lc.inclusion_taxon_concept_id = original_taxon_concept_id) OR (NOT lc.is_current))))) +-- Rows Removed by Filter: 44 +-- CTE fake_deletions +-- -> Unique (cost=0.07..0.08 rows=1 width=86) (actual time=359431.636..359435.238 rows=46 loops=1) +-- -> Sort (cost=0.07..0.07 rows=1 width=86) (actual time=359431.634..359432.354 rows=13415 loops=1) +-- Sort Key: lc_1.original_taxon_concept_id, lc_1.current_taxon_concept_id, lc_1.designation_id, lc_1.species_listing_id, lc_1.party_id +-- Sort Method: quicksort Memory: 2271kB +-- -> Nested Loop (cost=0.00..0.06 rows=1 width=86) (actual time=11359.141..359426.368 rows=13415 loops=1) +-- Join Filter: (lc_1.designation_id = ct_1.designation_id) +-- Rows Removed by Join Filter: 26830 +-- -> CTE Scan on prev_lc lc_1 (cost=0.00..0.02 rows=1 width=40) (actual time=11359.123..359415.743 rows=13415 loops=1) +-- Filter: appendix_change +-- Rows Removed by Filter: 335801755 +-- -> CTE Scan on deletion_change_types ct_1 (cost=0.00..0.02 rows=1 width=8) (actual time=0.000..0.000 rows=3 loops=13415) +-- Planning time: 1.459 ms +-- Execution time: 360161.079 ms +-- (37 rows) \ No newline at end of file From 37126d08487fb696a940fbe92af9ca43df8182f1 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Wed, 18 Mar 2026 10:08:14 +0000 Subject: [PATCH 11/15] idea-refactor-cascade --- idea-refactor-cascade.sql | 824 ++++++++++++++++---------------------- 1 file changed, 338 insertions(+), 486 deletions(-) diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql index ca861fc80..97535ead2 100644 --- a/idea-refactor-cascade.sql +++ b/idea-refactor-cascade.sql @@ -71,6 +71,43 @@ CREATE OR REPLACE VIEW taxon_ancestors_dv AS AND "rd"."ancestor_rank_id" = "atc"."rank_id" ; +create materialized view taxon_ancestors_mv + as select * from taxon_ancestors_dv +; + +create index on taxon_ancestors_mv ( + id, ancestor_id +); + +create index on taxon_ancestors_mv ( + id, rank_distance +); + +create index on taxon_ancestors_mv ( + id, ancestor_rank_depth +); + +create index on taxon_ancestors_mv ( + id, ancestor_rank_id +); + +DROP VIEW IF EXISTS change_types_view CASCADE; +CREATE OR REPLACE VIEW change_types_view AS +SELECT + ct.*, + CASE + WHEN ct.name = 'RESERVATION_WITHDRAWAL' THEN 1 + WHEN ct.name = 'DELETION' THEN 2 + WHEN ct.name = 'EXCEPTION' THEN 3 + WHEN ct.name = 'ADDITION' THEN 4 + WHEN ct.name = 'RESERVATION' THEN 5 + END change_type_rank + CASE + WHEN ct.name IN ('ADDITION', 'DELETION', 'EXCEPTION') THEN 1 + ELSE 2 + END change_type_group_id -- A/D/X, R/W +FROM change_types ct; + DROP VIEW IF EXISTS implied_listing_changes_view CASCADE; CREATE OR REPLACE VIEW implied_listing_changes_view AS -- affected_taxon_concept is a taxon concept that is affected by this listing @@ -87,53 +124,59 @@ WITH designations_and_intervals AS ( FROM designations LEFT JOIN eu_regulations_applicability_view intervals ON designations.name = 'EU' -), listing_changes_with_exceptions AS ( +), listing_changes_with_exclusions AS ( -- the purpose of this CTE is to aggregate excluded taxon concept ids SELECT - listing_changes.id, - change_types.designation_id, - change_types.name AS change_type_name, - listing_changes.taxon_concept_id, - listing_changes.species_listing_id, - listing_changes.change_type_id, - listing_changes.inclusion_taxon_concept_id, - listing_changes.event_id, - listing_changes.effective_at::DATE, - listing_changes.is_current, - ARRAY_AGG_NOTNULL(taxonomic_exceptions.taxon_concept_id) AS excluded_taxon_concept_ids - FROM listing_changes - LEFT JOIN listing_changes taxonomic_exceptions - ON listing_changes.id = taxonomic_exceptions.parent_id - AND listing_changes.taxon_concept_id != taxonomic_exceptions.taxon_concept_id - JOIN change_types ON change_types.id = listing_changes.change_type_id + lc.id, + ct.designation_id, + interval_events_ids, + lc.taxon_concept_id, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + ct.id AS change_type_id, + ct.change_type_rank AS change_type_rank, + ct.name AS change_type_name, + lc.event_id, + lc.effective_at::DATE, + lc.is_current, + ARRAY_AGG_NOTNULL(taxonomic_exclusions.taxon_concept_id) AS excluded_taxon_concept_ids + FROM listing_changes lc + LEFT JOIN listing_changes taxonomic_exclusions + ON lc.id = taxonomic_exclusions.parent_id + AND lc.taxon_concept_id != taxonomic_exclusions.taxon_concept_id + JOIN change_types_view ct ON ct.id = lc.change_type_id JOIN designations_and_intervals - ON designations_and_intervals.designation_id = change_types.designation_id + ON designations_and_intervals.designation_id = ct.designation_id AND ( designations_and_intervals IS NULL OR ARRAY_UPPER(designations_and_intervals.interval_events_ids, 1) IS NULL - OR listing_changes.event_id = ANY(designations_and_intervals.interval_events_ids) + OR lc.event_id = ANY(designations_and_intervals.interval_events_ids) ) GROUP BY - listing_changes.id, - change_types.designation_id, - change_types.name, - listing_changes.taxon_concept_id, - listing_changes.species_listing_id, - listing_changes.change_type_id, - listing_changes.inclusion_taxon_concept_id, - listing_changes.event_id, - listing_changes.effective_at::DATE, - listing_changes.is_current + lc.id, + ct.designation_id, + interval_events_ids, + lc.taxon_concept_id, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + ct.id, + ct.name, + ct.change_type_rank, + lc.event_id, + lc.effective_at::DATE, + lc.is_current ), aggregate_lc AS ( -- the purpose of this CTE is to aggregate listed and excluded populations SELECT lc.id, lc.designation_id, - lc.change_type_name, + lc.interval_events_ids, lc.taxon_concept_id, lc.species_listing_id, - lc.change_type_id, lc.inclusion_taxon_concept_id, + lc.change_type_id, + lc.change_type_name, + lc.change_type_rank, lc.event_id, lc.effective_at, lc.is_current, @@ -141,77 +184,238 @@ WITH designations_and_intervals AS ( party_distribution.geo_entity_id AS party_id, ARRAY_AGG_NOTNULL(listing_distributions.geo_entity_id) AS listed_geo_entities_ids, ARRAY_AGG_NOTNULL(excluded_distributions.geo_entity_id) AS excluded_geo_entities_ids - FROM listing_changes_with_exceptions lc + FROM listing_changes_with_exclusions lc LEFT JOIN listing_distributions ON lc.id = listing_distributions.listing_change_id AND NOT listing_distributions.is_party LEFT JOIN listing_distributions party_distribution ON lc.id = party_distribution.listing_change_id AND party_distribution.is_party - LEFT JOIN listing_changes population_exceptions - ON lc.id = population_exceptions.parent_id - AND lc.taxon_concept_id = population_exceptions.taxon_concept_id + LEFT JOIN listing_changes population_exclusions + ON lc.id = population_exclusions.parent_id + AND lc.taxon_concept_id = population_exclusions.taxon_concept_id LEFT JOIN listing_distributions excluded_distributions - ON population_exceptions.id = excluded_distributions.listing_change_id + ON population_exclusions.id = excluded_distributions.listing_change_id AND NOT excluded_distributions.is_party GROUP BY lc.id, lc.designation_id, - lc.change_type_name, + lc.interval_events_ids, lc.taxon_concept_id, lc.species_listing_id, - lc.change_type_id, lc.inclusion_taxon_concept_id, + lc.change_type_id, + lc.change_type_name, + lc.change_type_rank, lc.event_id, lc.effective_at, lc.is_current, party_distribution.geo_entity_id, lc.excluded_taxon_concept_ids +), addition_groups AS ( + SELECT + DISTINCT ON ( + lc.designation_id, + lc.interval_events_ids, + lc.taxon_concept_id, + lc.party_id, + lc.effective_at::DATE + ) + lc.designation_id, + lc.interval_events_ids, + lc.taxon_concept_id, + lc.party_id, + lc.effective_at::DATE AS effective_at, + hstore( + array_agg(ARRAY[species_listing_id, lc.id]::TEXT[]) FILTER ( + WHERE lc.change_type_name = 'ADDITION' + ) OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.effective_at::DATE + ORDER BY + species_listing_id + ) + ) AS additions_by_listing_id, + hstore( + array_agg(ARRAY[species_listing_id, lc.id]::TEXT[]) FILTER ( + WHERE lc.change_type_name = 'DELETION' + ) OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.effective_at::DATE + ORDER BY + species_listing_id + ) + ) AS deletions_by_listing_id, + CASE WHEN lc.change_type_name = 'ADDITION' + THEN dense_rank() OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.change_type_name + ORDER BY + lc.effective_at::DATE + )::INT + END AS addition_group_rank, + CASE WHEN lc.change_type_name IN ('ADDITION', 'DELETION') + THEN dense_rank() OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id + ORDER BY + lc.effective_at::DATE + )::INT + END AS add_del_group_rank + FROM aggregate_lc lc + WHERE lc.change_type_name IN ('ADDITION', 'DELETION') + ORDER BY + lc.designation_id, + lc.interval_events_ids, + lc.taxon_concept_id, + lc.party_id, + lc.effective_at::DATE +), synthetic_deletions_needed AS ( + -- TODO: make this recursive and stateful as we cannot rely on additions. + -- OR create synthetic additions instead? + SELECT DISTINCT + ag.designation_id, + ag.interval_events_ids, + ag.taxon_concept_id, + ag.party_id, + ag.addition_group_rank, + ag.effective_at, + unnest(akeys(deletions_by_listing_id))::BIGINT AS species_listing_id, + unnest(avals(deletions_by_listing_id))::BIGINT AS deleted_listing_change_id + FROM ( + SELECT + ag.designation_id, + ag.interval_events_ids, + ag.taxon_concept_id, + ag.addition_group_rank, + ag.party_id, + ag.effective_at::DATE AS effective_at, + ( + prev_ag.additions_by_listing_id + ) - COALESCE( + akeys(ag.deletions_by_listing_id), '{}'::text[] + ) - COALESCE( + array_agg( + (SELECT key FROM each(dg.deletions_by_listing_id)) + ) OVER ( + PARTITION BY + ag.designation_id, + ag.interval_events_ids, + ag.taxon_concept_id, + ag.party_id, + ag.addition_group_rank + ), + '{}'::text[] + ) AS deletions_by_listing_id + FROM addition_groups ag + JOIN addition_groups prev_ag + ON ag.designation_id = prev_ag.designation_id + AND ag.interval_events_ids IS NOT DISTINCT FROM prev_ag.interval_events_ids + AND ag.taxon_concept_id = prev_ag.taxon_concept_id + AND ag.party_id IS NOT DISTINCT FROM prev_ag.party_id + AND ag.addition_group_rank = prev_ag.addition_group_rank + 1 + LEFT JOIN addition_groups dg + ON ag.designation_id = dg.designation_id + AND ag.interval_events_ids IS NOT DISTINCT FROM dg.interval_events_ids + AND ag.taxon_concept_id = dg.taxon_concept_id + AND ag.party_id IS NOT DISTINCT FROM dg.party_id + AND ag.add_del_group_rank > dg.add_del_group_rank + AND prev_ag.add_del_group_rank < dg.add_del_group_rank + WHERE NOT all_additions_are_inclusions + ) AS ag ) SELECT - lc.*, - tc.taxon_concept_id AS affected_taxon_concept_id, + 0 - ag.deleted_listing_change_id AS id, + ag.designation_id, + ag.interval_events_ids, + ag.taxon_concept_id, + ag.species_listing_id, + NULL AS inclusion_taxon_concept_id, + ct.id AS change_type_id, + ct.name AS change_type_name, + ct.change_type_rank AS change_type_rank, + NULL AS event_id, + ag.effective_at AS effective_at, + FALSE AS is_current, + '{}' AS excluded_taxon_concept_ids, + ag.party_id AS party_id, + '{}' AS listed_geo_entities_ids, + '{}' AS excluded_geo_entities_ids +FROM fake_deletions_needed ag +JOIN change_types_view ct + ON ct.designation_id = ag.designation_id + AND ct.name = 'DELETION' +UNION ALL +SELECT * FROM aggregate_lc; + +DROP VIEW IF EXISTS inherited_listing_changes_view CASCADE; +CREATE OR REPLACE VIEW inherited_listing_changes_view AS +SELECT + lc.id, + tc.id AS taxon_concept_id, + lc.taxon_concept_id AS original_taxon_concept_id, -- Make the tree distance reflect distance from inclusion -- TODO TEST Rhinopittecus roxellana - COALESCE(itc.tree_distance, tc.tree_distance) tree_distance, + COALESCE(itc.rank_distance, tc.rank_distance) rank_distance, + lc.designation_id, + lc.interval_events_ids, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + lc.change_type_id, + lc.change_type_name, + lc.change_type_rank, + lc.event_id, + lc.effective_at, + lc.is_current, + lc.excluded_taxon_concept_ids, + lc.party_id, + lc.listed_geo_entities_ids, + lc.excluded_geo_entities_ids, -- the following ROW_NUMBER call will assign chronological order to listing changes -- in scope of the affected taxon concept and a particular designation ROW_NUMBER() OVER ( PARTITION BY - tc.taxon_concept_id, - designation_id, + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, -- fix Agapornis fischeri, which has -- -- CH R..W -- LI R..W lc.party_id ORDER BY - effective_at, - CASE - WHEN change_type_name = 'DELETION' THEN 0 - WHEN change_type_name = 'RESERVATION_WITHDRAWAL' THEN 1 - WHEN change_type_name = 'ADDITION' THEN 2 - WHEN change_type_name = 'RESERVATION' THEN 3 - WHEN change_type_name = 'EXCEPTION' THEN 4 - END, - -- Before 2026 this was ascending, but we want e.g. species listings to take - -- priority over genus listings. - tc.tree_distance DESC - -- ??? OR would it be better to - -- COALESCE(itc.tree_distance, tc.tree_distance) tree_distance + lc.effective_at, + lc.change_type_rank, + tc.rank_distance DESC )::INT AS timeline_position -FROM aggregate_lc lc -JOIN taxon_concepts_and_ancestors_mview tc - ON lc.taxon_concept_id = tc.ancestor_taxon_concept_id -LEFT JOIN taxon_concepts_and_ancestors_mview itc - ON lc.inclusion_taxon_concept_id = itc.ancestor_taxon_concept_id - AND lc.taxon_concept_id = itc.taxon_concept_id +FROM implied_listing_changes_view lc +JOIN taxon_ancestors_mv tc + ON lc.taxon_concept_id = tc.ancestor_id +LEFT JOIN taxon_ancestors_mv itc + ON lc.inclusion_taxon_concept_id = itc.ancestor_id + AND lc.taxon_concept_id = itc.id ; DROP VIEW IF EXISTS taxon_concepts_with_distributions_and_ancestors CASCADE; CREATE OR REPLACE VIEW taxon_concepts_with_distributions_and_ancestors AS SELECT tc.*, + AVALS(ancestor_fields)::INTEGER[] || tc.id AS ancestor_ids, (ancestor_fields->'kingdom_id')::INTEGER AS kingdom_id, (ancestor_fields->'phylum_id')::INTEGER AS phylum_id, (ancestor_fields->'class_id')::INTEGER AS class_id, @@ -228,7 +432,10 @@ JOIN ( ta.id, hstore( array_agg(ARRAY[lower(r.name) || '_id', ancestor_id::text]) - ) AS ancestor_fields + ) AS ancestor_fields, + hstore( + array_agg(ARRAY[rank_distance::text, ancestor_id::text]) + ) AS ancestor_id_by_distance FROM taxon_ancestors_dv ta JOIN ranks r ON ta.ancestor_rank_id = r.id GROUP BY ta.id @@ -241,443 +448,88 @@ JOIN ( GROUP BY taxon_concept_id ) td ON tc.id = td.id; -DROP VIEW IF EXISTS applicable_listing_changes_timeline_view CASCADE; -CREATE OR REPLACE VIEW applicable_listing_changes_timeline_view AS -WITH RECURSIVE listing_changes_timeline AS ( - SELECT lc.id, - designation_id, - affected_taxon_concept_id AS original_taxon_concept_id, - taxon_concept_id AS current_taxon_concept_id, - inclusion_taxon_concept_id, - party_id, - species_listing_id, - change_type_id, - event_id, - effective_at, - CASE -- context - WHEN inclusion_taxon_concept_id IS NULL - THEN HSTORE(species_listing_id::TEXT, taxon_concept_id::TEXT) - ELSE HSTORE(species_listing_id::TEXT, inclusion_taxon_concept_id::TEXT) - END AS context, - -- CASE WHEN - -- THEN - -- ELSE - HSTORE(tree_distance::TEXT, (lc.id)::TEXT) AS listing_change_ids_by_distance, - is_current, - tree_distance AS context_tree_distance, - timeline_position, - CASE -- is_applicable - WHEN ( - -- there are listed populations - ARRAY_UPPER(listed_geo_entities_ids, 1) IS NOT NULL - -- and the taxon has its own distribution and does not occur in any of them - AND ARRAY_UPPER(taxon_concepts_mview.geo_entity_ids, 1) IS NOT NULL - AND NOT listed_geo_entities_ids && taxon_concepts_mview.geo_entity_ids - ) OR ( - -- when all populations are excluded - ARRAY_UPPER(excluded_geo_entities_ids, 1) IS NOT NULL - AND ARRAY_UPPER(taxon_concepts_mview.geo_entity_ids, 1) IS NOT NULL - AND excluded_geo_entities_ids @> taxon_concepts_mview.geo_entity_ids - ) - THEN FALSE - WHEN ARRAY_UPPER(excluded_taxon_concept_ids, 1) IS NOT NULL - -- if taxon or any of its ancestors is excluded from this listing - AND excluded_taxon_concept_ids && ARRAY[ - affected_taxon_concept_id, - taxon_concepts_mview.kingdom_id, - taxon_concepts_mview.phylum_id, - taxon_concepts_mview.class_id, - taxon_concepts_mview.order_id, - taxon_concepts_mview.family_id, - taxon_concepts_mview.genus_id, - taxon_concepts_mview.species_id - ] - THEN FALSE - ELSE TRUE - END AS is_applicable - FROM implied_listing_changes_view lc - JOIN taxon_concepts_with_distributions_and_ancestors taxon_concepts_mview - ON lc.affected_taxon_concept_id = taxon_concepts_mview.id - WHERE timeline_position = 1 - -- AND lc.affected_taxon_concept_id = $1 - UNION +DROP VIEW IF EXISTS applicable_implied_taxon_listing_changes_view CASCADE; +CREATE OR REPLACE VIEW applicable_implied_taxon_listing_changes_view AS +SELECT + lc.*, + ( + -- there are listed populations + ARRAY_UPPER(listed_geo_entities_ids, 1) IS NOT NULL + -- and the taxon has its own distribution and does not occur in any of them + AND ARRAY_UPPER(tc.geo_entity_ids, 1) IS NOT NULL + AND NOT listed_geo_entities_ids && tc.geo_entity_ids + ) OR ( + -- when all populations are excluded + ARRAY_UPPER(excluded_geo_entities_ids, 1) IS NOT NULL + AND ARRAY_UPPER(tc.geo_entity_ids, 1) IS NOT NULL + AND excluded_geo_entities_ids @> tc.geo_entity_ids + ) AS is_geographically_excluded, + ( + ARRAY_UPPER(excluded_taxon_concept_ids, 1) IS NOT NULL + -- if taxon or any of its ancestors is excluded from this listing + AND excluded_taxon_concept_ids && tc.ancestor_ids + ) AS is_taxonomically_excluded +FROM inherited_listing_changes_view lc +JOIN taxon_concepts_with_distributions_and_ancestors tc + ON lc.taxon_concept_id = tc.id; - SELECT - hi.id, - hi.designation_id, - listing_changes_timeline.original_taxon_concept_id, - hi.taxon_concept_id, - hi.inclusion_taxon_concept_id, - hi.party_id, - hi.species_listing_id, - hi.change_type_id, - hi.event_id, - hi.effective_at, - CASE -- context - WHEN hi.inclusion_taxon_concept_id IS NOT NULL - AND ( - AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] - OR listing_changes_timeline.context = ''::HSTORE - ) - THEN HSTORE(hi.species_listing_id::TEXT, hi.inclusion_taxon_concept_id::TEXT) - WHEN change_types.name = 'DELETION' - AND hi.taxon_concept_id = hi.affected_taxon_concept_id - THEN listing_changes_timeline.context - ARRAY[hi.species_listing_id::TEXT] - WHEN change_types.name = 'DELETION' - THEN listing_changes_timeline.context - HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) - -- if it is a new listing at closer level that replaces an older listing, wipe out the context - WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance - AND hi.effective_at > listing_changes_timeline.effective_at - AND change_types.name = 'ADDITION' - THEN HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) - -- if it is a same day split listing we don''t want to wipe the other part of the split from the context - WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance - AND change_types.name = 'ADDITION' - THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) - WHEN hi.tree_distance <= listing_changes_timeline.context_tree_distance - AND hi.affected_taxon_concept_id = hi.taxon_concept_id - AND change_types.name = 'ADDITION' - THEN HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) - -- changing this to <= breaks Ursus arctos isabellinus - WHEN hi.tree_distance <= listing_changes_timeline.context_tree_distance - AND change_types.name = 'ADDITION' - THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) - ELSE listing_changes_timeline.context - END AS context, - -- listing_changes_timeline, - listing_changes_timeline.listing_change_ids_by_distance || HSTORE( - tree_distance::TEXT, listing_changes_timeline.id::TEXT - ) AS listing_change_ids_by_distance, - hi.is_current, - CASE -- context_tree_distance - WHEN ( - hi.inclusion_taxon_concept_id IS NOT NULL - AND AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] - ) OR hi.tree_distance < listing_changes_timeline.context_tree_distance - THEN hi.tree_distance - ELSE listing_changes_timeline.context_tree_distance - END AS context_tree_distance, - hi.timeline_position, - CASE -- is applicable - WHEN ( - -- there are listed populations - ARRAY_UPPER(hi.listed_geo_entities_ids, 1) IS NOT NULL - -- and the taxon has its own distribution and does not occur in any of them - AND ARRAY_UPPER(taxon_concepts_mview.geo_entity_ids, 1) IS NOT NULL - AND NOT hi.listed_geo_entities_ids && taxon_concepts_mview.geo_entity_ids - ) OR ( - -- when all populations are excluded - ARRAY_UPPER(hi.excluded_geo_entities_ids, 1) IS NOT NULL - AND ARRAY_UPPER(taxon_concepts_mview.geo_entity_ids, 1) IS NOT NULL - AND hi.excluded_geo_entities_ids @> taxon_concepts_mview.geo_entity_ids - ) - THEN FALSE - WHEN ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NOT NULL - -- if taxon or any of its ancestors is excluded from this listing - AND hi.excluded_taxon_concept_ids && ARRAY[ - hi.affected_taxon_concept_id, - taxon_concepts_mview.kingdom_id, - taxon_concepts_mview.phylum_id, - taxon_concepts_mview.class_id, - taxon_concepts_mview.order_id, - taxon_concepts_mview.family_id, - taxon_concepts_mview.genus_id, - taxon_concepts_mview.species_id - ] - THEN FALSE - WHEN listing_changes_timeline.context -> hi.species_listing_id::TEXT = hi.taxon_concept_id::TEXT - OR hi.taxon_concept_id = listing_changes_timeline.original_taxon_concept_id - -- this line to make Moschus leucogaster happy - OR AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] - THEN TRUE - WHEN listing_changes_timeline.context = ''::HSTORE --this would be the case when deleted - AND ( - ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NOT NULL - AND NOT hi.excluded_taxon_concept_ids && ARRAY[hi.affected_taxon_concept_id] - OR ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NULL - ) - AND hi.inclusion_taxon_concept_id IS NULL - AND hi.change_type_name = 'ADDITION' - THEN TRUE -- allows for re-listing - WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance - THEN TRUE - ELSE FALSE - END AS is_applicable - FROM implied_listing_changes_view hi - JOIN listing_changes_timeline - ON hi.designation_id = listing_changes_timeline.designation_id - AND listing_changes_timeline.original_taxon_concept_id = hi.affected_taxon_concept_id - AND listing_changes_timeline.timeline_position + 1 = hi.timeline_position - JOIN change_types - ON hi.change_type_id = change_types.id - JOIN taxon_concepts_with_distributions_and_ancestors taxon_concepts_mview - ON hi.affected_taxon_concept_id = taxon_concepts_mview.id -) -SELECT * FROM listing_changes_timeline; -drop table if exists tmp_all_listing_changes_timeline_matview; -drop table if exists applicable_listing_changes_timeline_dt; -drop table if exists applicable_listing_changes_timeline_mt; +-- A timeline is identified by: +-- +-- * `taxon_concept_id` +-- * `designation_id` +-- * `interval_events_ids` +-- * `party_id` (important for e.g. Agapornis fischeri) +-- +-- A timeline can have one or more snapshots. +-- +-- * `taxon_concept_id` +-- * `listing_change_id` +-- * (`designation_id` is strictly redundant, dependent on `listing_change_id`) +-- * `interval_events_ids` +-- * `party_id` +-- * `timeline_position` -create materialized view applicable_listing_changes_timeline_mv - as select * from applicable_listing_changes_timeline_view -; -create index on applicable_listing_changes_timeline_mv ( - current_taxon_concept_id, designation_id, change_type_id, party_id, effective_at -); +DROP TABLE IF EXISTS tmp_all_listing_changes_timeline_matview; +DROP TABLE IF EXISTS applicable_listing_changes_timeline_dt; +DROP TABLE IF EXISTS applicable_listing_changes_timeline_mt; -create index on applicable_listing_changes_timeline_mv ( - current_taxon_concept_id, change_type_id, effective_at -); +CREATE MATERIALIZED VIEW applicable_listing_changes_timeline_mv AS + SELECT * FROM applicable_implied_taxon_listing_changes_view; -create index on applicable_listing_changes_timeline_mv ( - species_listing_id, current_taxon_concept_id +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + taxon_concept_id, designation_id, change_type_id, party_id, effective_at ); -create index on applicable_listing_changes_timeline_mv ( - current_taxon_concept_id, designation_id, party_id +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + taxon_concept_id, change_type_id, effective_at, original_taxon_concept_id +); +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + change_type_id, taxon_concept_id, original_taxon_concept_id ); --- explain analyse --- create table tmp_all_listing_changes_timeline_dt --- as select * from applicable_listing_changes_timeline_view; +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + species_listing_id, taxon_concept_id +); ---- 255s ---- 909344 ---- cites_listing_changes_mview + eu_listing_changes_mview + cms_listing_changes_mview are 1165459 ---- close but not quite +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + taxon_concept_id, designation_id, party_id +); -DROP VIEW IF EXISTS all_listing_changes_and_synthetics_view; --- TODO: Why is this not applied to CMS? -CREATE OR REPLACE VIEW all_listing_changes_and_synthetics_view ( - -- want to make sure this is the same set of columns as the previous view - "id", - "designation_id", - "original_taxon_concept_id", - "current_taxon_concept_id", - "context", - "inclusion_taxon_concept_id", - "party_id", - "species_listing_id", - "change_type_id", - "event_id", - "effective_at", - "is_current", - "context_tree_distance", - "timeline_position", - "is_applicable", - -- plus a few - "explicit_change", - "show_in_timeline", - "show_in_downloads", - "show_in_history" -) AS --- find inherited listing changes superceded by own listing changes --- mark them as not current in context of the child and add fake deletion records --- so that those inherited events are terminated properly on the timelines -WITH addition_change_types AS ( - SELECT * - FROM "change_types" - WHERE "name" = 'ADDITION' -), deletion_change_types AS ( - SELECT * - FROM "change_types" - WHERE "name" = 'DELETION' -), exception_change_types AS ( - SELECT * - FROM "change_types" - WHERE "name" = 'EXCEPTION' -), prev_lc AS ( - SELECT - lc.id, - lc.designation_id, - lc.original_taxon_concept_id, - lc.current_taxon_concept_id, - lc.context, - lc.inclusion_taxon_concept_id, - lc.party_id, - lc.species_listing_id, - lc.change_type_id, - lc.event_id, - next_lc.effective_at, - FALSE AS is_current, - lc.context_tree_distance, - lc.timeline_position, - lc.is_applicable, - ( - lc.species_listing_id != next_lc.species_listing_id - ) AS appendix_change - FROM addition_change_types ct - JOIN applicable_listing_changes_timeline_mv lc - ON lc.change_type_id = ct.id - JOIN applicable_listing_changes_timeline_mv next_lc - ON lc.current_taxon_concept_id = next_lc.current_taxon_concept_id - AND lc.change_type_id = next_lc.change_type_id - AND lc.effective_at < next_lc.effective_at - AND next_lc.party_id IS NOT DISTINCT FROM lc.party_id - WHERE ( - ( - -- own listing change preceded by inherited listing change - next_lc.original_taxon_concept_id = next_lc.current_taxon_concept_id - AND lc.original_taxon_concept_id != lc.current_taxon_concept_id - ) OR ( - -- own listing change preceded by own listing change if it is a not current inclusion - next_lc.original_taxon_concept_id = next_lc.current_taxon_concept_id - AND lc.original_taxon_concept_id = lc.current_taxon_concept_id - AND lc.inclusion_taxon_concept_id IS NOT NULL - AND NOT lc.is_current - ) OR ( - -- inherited listing change preceded by inherited listing change - next_lc.original_taxon_concept_id != next_lc.current_taxon_concept_id - AND lc.original_taxon_concept_id != lc.current_taxon_concept_id - ) OR ( - -- inherited listing change preceded by own listing change if it is a not current inclusion - -- in the same taxon concept as the current listing change - next_lc.original_taxon_concept_id != next_lc.current_taxon_concept_id - AND lc.original_taxon_concept_id = lc.current_taxon_concept_id - AND lc.inclusion_taxon_concept_id IS NOT NULL - AND ( - lc.inclusion_taxon_concept_id = next_lc.original_taxon_concept_id - OR NOT lc.is_current - ) - ) - ) -), fake_deletions AS ( - -- note: this generates records without an id - -- this is ok for the timelines, and those records are not used elsewhere - -- ids in this view are not unique anyway, since any id - -- from listing changes can occur multiple times - SELECT - -- TODO: test if multiple appendix changes work - DISTINCT ON ( - lc.original_taxon_concept_id, - lc.current_taxon_concept_id, - lc.designation_id, - lc.species_listing_id, - lc.party_id - ) - 0 - lc.id AS id, - lc.designation_id AS designation_id, - lc.original_taxon_concept_id AS original_taxon_concept_id, - lc.current_taxon_concept_id AS current_taxon_concept_id, - ''::hstore AS context, - NULL::INT AS inclusion_taxon_concept_id, - lc.party_id AS party_id, - lc.species_listing_id AS species_listing_id, - ct.id AS change_type_id, - lc.event_id AS event_id, - lc.effective_at AS effective_at, - TRUE AS is_current, - lc.context_tree_distance AS context_tree_distance, - lc.timeline_position AS timeline_position, - TRUE AS is_applicable, - FALSE AS explicit_change, - TRUE AS show_in_timeline, - FALSE AS show_in_downloads, - FALSE AS show_in_history - FROM prev_lc lc - JOIN deletion_change_types ct - ON ct.designation_id = lc.designation_id - WHERE appendix_change -) --- SELECT --- lc.id, --- lc.designation_id, --- lc.original_taxon_concept_id, --- lc.current_taxon_concept_id, --- lc.context, --- lc.inclusion_taxon_concept_id, --- lc.party_id, --- lc.species_listing_id, --- lc.change_type_id, --- lc.event_id, --- lc.effective_at, --- CASE --- WHEN terminated_lc.id IS NOT NULL THEN TRUE --- ELSE lc.is_current --- END AS is_current, --- lc.context_tree_distance, --- lc.timeline_position, --- lc.is_applicable, --- TRUE AS explicit_change, --- xct.id IS NULL AS show_in_timeline, --- xct.id IS NULL AS show_in_history, --- xct.id IS NULL AS show_in_downloads --- FROM applicable_listing_changes_timeline_mv lc --- -- if the row exists in prev_lc then it has been superseded --- LEFT JOIN prev_lc terminated_lc --- ON terminated_lc.id = lc.id --- AND terminated_lc.current_taxon_concept_id = lc.current_taxon_concept_id --- LEFT JOIN exception_change_types xct --- ON lc.change_type_id = xct.id --- UNION ALL -SELECT - id, - designation_id, +CREATE INDEX ON applicable_listing_changes_timeline_mv ( original_taxon_concept_id, - current_taxon_concept_id, - context, - inclusion_taxon_concept_id, - party_id, + taxon_concept_id, + designation_id, species_listing_id, - change_type_id, - event_id, - effective_at, - is_current, - context_tree_distance, - timeline_position, - is_applicable, - explicit_change, - show_in_timeline, - show_in_history, - show_in_downloads -FROM fake_deletions -; - - -drop table if exists synth_listing_changes_timeline_dt; -explain analyse -create table synth_listing_changes_timeline_dt -as select * from all_listing_changes_and_synthetics_view where designation_id = 1; + party_id +); +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + taxon_concept_id, effective_at, id +); --- CTE Scan on fake_deletions (cost=2358.58..2358.61 rows=1 width=86) (actual time=359431.642..359435.274 rows=28 loops=1) --- Filter: (designation_id = 1) --- Rows Removed by Filter: 18 --- CTE addition_change_types --- -> Seq Scan on change_types (cost=0.00..1.19 rows=1 width=636) (actual time=0.007..0.011 rows=3 loops=1) --- Filter: ((name)::text = 'ADDITION'::text) --- Rows Removed by Filter: 12 --- CTE deletion_change_types --- -> Seq Scan on change_types change_types_1 (cost=0.00..1.19 rows=1 width=636) (actual time=0.012..0.014 rows=3 loops=1) --- Filter: ((name)::text = 'DELETION'::text) --- Rows Removed by Filter: 12 --- CTE prev_lc --- -> Nested Loop (cost=0.45..2356.13 rows=1 width=83) (actual time=0.143..162253.135 rows=335815170 loops=1) --- -> Hash Join (cost=0.03..2153.73 rows=366 width=86) (actual time=0.029..150.562 rows=63307 loops=1) --- Hash Cond: (lc.change_type_id = ct.id) --- -> Seq Scan on applicable_listing_changes_timeline_mv lc (cost=0.00..1875.39 rows=73239 width=82) (actual time=0.007..31.230 rows=73239 loops=1) --- -> Hash (cost=0.02..0.02 rows=1 width=4) (actual time=0.014..0.015 rows=3 loops=1) --- Buckets: 1024 Batches: 1 Memory Usage: 9kB --- -> CTE Scan on addition_change_types ct (cost=0.00..0.02 rows=1 width=4) (actual time=0.008..0.012 rows=3 loops=1) --- -> Index Scan using applicable_listing_changes_ti_current_taxon_concept_id_chan_idx on applicable_listing_changes_timeline_mv next_lc (cost=0.42..0.54 rows=1 width=24) (actual time=0.016..1.573 rows=5305 loops=63307) --- Index Cond: ((current_taxon_concept_id = lc.current_taxon_concept_id) AND (change_type_id = lc.change_type_id) AND (lc.effective_at < effective_at)) --- Filter: ((NOT (party_id IS DISTINCT FROM lc.party_id)) AND (((original_taxon_concept_id = current_taxon_concept_id) AND (lc.original_taxon_concept_id <> lc.current_taxon_concept_id)) OR ((original_taxon_concept_id = current_taxon_concept_id) AND (lc.original_taxon_concept_id = lc.current_taxon_concept_id) AND (lc.inclusion_taxon_concept_id IS NOT NULL) AND (NOT lc.is_current)) OR ((original_taxon_concept_id <> current_taxon_concept_id) AND (lc.original_taxon_concept_id <> lc.current_taxon_concept_id)) OR ((original_taxon_concept_id <> current_taxon_concept_id) AND (lc.original_taxon_concept_id = lc.current_taxon_concept_id) AND (lc.inclusion_taxon_concept_id IS NOT NULL) AND ((lc.inclusion_taxon_concept_id = original_taxon_concept_id) OR (NOT lc.is_current))))) --- Rows Removed by Filter: 44 --- CTE fake_deletions --- -> Unique (cost=0.07..0.08 rows=1 width=86) (actual time=359431.636..359435.238 rows=46 loops=1) --- -> Sort (cost=0.07..0.07 rows=1 width=86) (actual time=359431.634..359432.354 rows=13415 loops=1) --- Sort Key: lc_1.original_taxon_concept_id, lc_1.current_taxon_concept_id, lc_1.designation_id, lc_1.species_listing_id, lc_1.party_id --- Sort Method: quicksort Memory: 2271kB --- -> Nested Loop (cost=0.00..0.06 rows=1 width=86) (actual time=11359.141..359426.368 rows=13415 loops=1) --- Join Filter: (lc_1.designation_id = ct_1.designation_id) --- Rows Removed by Join Filter: 26830 --- -> CTE Scan on prev_lc lc_1 (cost=0.00..0.02 rows=1 width=40) (actual time=11359.123..359415.743 rows=13415 loops=1) --- Filter: appendix_change --- Rows Removed by Filter: 335801755 --- -> CTE Scan on deletion_change_types ct_1 (cost=0.00..0.02 rows=1 width=8) (actual time=0.000..0.000 rows=3 loops=13415) --- Planning time: 1.459 ms --- Execution time: 360161.079 ms --- (37 rows) \ No newline at end of file +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + id, taxon_concept_id +); From 9d43693ac8fe5f99dbe10abdf50ccc04eb4aa1a7 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Fri, 20 Mar 2026 09:32:57 +0000 Subject: [PATCH 12/15] wip --- idea-refactor-cascade.sql | 261 ++++++++++++++++++++++++++++++++------ 1 file changed, 219 insertions(+), 42 deletions(-) diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql index 97535ead2..ab6513b4f 100644 --- a/idea-refactor-cascade.sql +++ b/idea-refactor-cascade.sql @@ -10,7 +10,8 @@ CREATE OR REPLACE VIEW taxon_ancestors_dv AS '{}'::BIGINT[] AS "ancestor_ids" FROM "taxon_concepts" roots WHERE "parent_id" IS NULL - AND "name_status" = 'A' + -- It turns out that some non-A names have ancestries + -- AND "name_status" = 'A' UNION ALL SELECT "child"."taxonomy_id", @@ -30,7 +31,7 @@ CREATE OR REPLACE VIEW taxon_ancestors_dv AS ), rank_depths AS ( SELECT "id" AS "rank_id", - ROW_NUMBER() OVER() AS "rank_depth" + row_number() OVER() AS "rank_depth" FROM ( SELECT ( '{' || translate(taxonomic_position, '.', ',') || '}' @@ -69,6 +70,23 @@ CREATE OR REPLACE VIEW taxon_ancestors_dv AS JOIN "rank_distances" rd ON "rd"."descendant_rank_id" = "ta"."rank_id" AND "rd"."ancestor_rank_id" = "atc"."rank_id" +UNION ALL + SELECT + "ta"."taxonomy_id", + "ta"."id", + "ta"."rank_id", + "ta"."ancestor_ids", + '{}' AS "path_ids", + "ta"."id" AS "ancestor_id", + "ta"."rank_id" AS "ancestor_rank_id", + "descendant_rank_depth" AS "ancestor_rank_depth", + "descendant_rank_depth" AS "rank_depth", + 0 AS "rank_distance" + FROM "taxon_ancestors" ta + JOIN "rank_distances" rd + ON "rd"."descendant_rank_id" = "ta"."rank_id" + AND "rd"."ancestor_rank_id" = "ta"."rank_id" + ; create materialized view taxon_ancestors_mv @@ -101,7 +119,7 @@ SELECT WHEN ct.name = 'EXCEPTION' THEN 3 WHEN ct.name = 'ADDITION' THEN 4 WHEN ct.name = 'RESERVATION' THEN 5 - END change_type_rank + END change_type_rank, CASE WHEN ct.name IN ('ADDITION', 'DELETION', 'EXCEPTION') THEN 1 ELSE 2 @@ -115,12 +133,12 @@ CREATE OR REPLACE VIEW implied_listing_changes_view AS -- (i.e. it is an ancestor's listing change). WITH designations_and_intervals AS ( SELECT - designations.id designation_id, - designations.name designation_name, - designations.taxonomy_id taxonomy_id, - intervals.start_date interval_start_date, - intervals.end_date interval_end_date, - intervals.events_ids interval_events_ids + designations.id AS designation_id, + designations.name AS designation_name, + designations.taxonomy_id AS taxonomy_id, + intervals.start_date AS interval_start_date, + intervals.end_date AS interval_end_date, + intervals.events_ids AS interval_events_ids FROM designations LEFT JOIN eu_regulations_applicability_view intervals ON designations.name = 'EU' @@ -129,7 +147,7 @@ WITH designations_and_intervals AS ( SELECT lc.id, ct.designation_id, - interval_events_ids, + designations_and_intervals.interval_events_ids, lc.taxon_concept_id, lc.species_listing_id, lc.inclusion_taxon_concept_id, @@ -137,7 +155,9 @@ WITH designations_and_intervals AS ( ct.change_type_rank AS change_type_rank, ct.name AS change_type_name, lc.event_id, - lc.effective_at::DATE, + -- A bug exists where EXCLUSIONS have `effective_at='2012-09-21 07:32:20'`, + -- instead of that of the parent. + COALESCE(included_lc.effective_at, lc.effective_at)::DATE AS effective_at, lc.is_current, ARRAY_AGG_NOTNULL(taxonomic_exclusions.taxon_concept_id) AS excluded_taxon_concept_ids FROM listing_changes lc @@ -152,21 +172,12 @@ WITH designations_and_intervals AS ( OR ARRAY_UPPER(designations_and_intervals.interval_events_ids, 1) IS NULL OR lc.event_id = ANY(designations_and_intervals.interval_events_ids) ) - GROUP BY - lc.id, - ct.designation_id, - interval_events_ids, - lc.taxon_concept_id, - lc.species_listing_id, - lc.inclusion_taxon_concept_id, - ct.id, - ct.name, - ct.change_type_rank, - lc.event_id, - lc.effective_at::DATE, - lc.is_current + LEFT JOIN listing_changes included_lc + ON lc.parent_id = included_lc.id + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ), aggregate_lc AS ( --- the purpose of this CTE is to aggregate listed and excluded populations + -- the purpose of this CTE is to aggregate listed and excluded populations + -- All rows in this table will go into implied_listing_changes_view SELECT lc.id, lc.designation_id, @@ -241,6 +252,7 @@ WITH designations_and_intervals AS ( ) ) AS additions_by_listing_id, hstore( + -- todo: multiple listing changes per appendix is possible array_agg(ARRAY[species_listing_id, lc.id]::TEXT[]) FILTER ( WHERE lc.change_type_name = 'DELETION' ) OVER ( @@ -336,7 +348,6 @@ WITH designations_and_intervals AS ( AND ag.party_id IS NOT DISTINCT FROM dg.party_id AND ag.add_del_group_rank > dg.add_del_group_rank AND prev_ag.add_del_group_rank < dg.add_del_group_rank - WHERE NOT all_additions_are_inclusions ) AS ag ) SELECT @@ -356,7 +367,7 @@ SELECT ag.party_id AS party_id, '{}' AS listed_geo_entities_ids, '{}' AS excluded_geo_entities_ids -FROM fake_deletions_needed ag +FROM synthetic_deletions_needed ag JOIN change_types_view ct ON ct.designation_id = ag.designation_id AND ct.name = 'DELETION' @@ -365,17 +376,17 @@ SELECT * FROM aggregate_lc; DROP VIEW IF EXISTS inherited_listing_changes_view CASCADE; CREATE OR REPLACE VIEW inherited_listing_changes_view AS -SELECT - lc.id, +SELECT DISTINCT tc.id AS taxon_concept_id, - lc.taxon_concept_id AS original_taxon_concept_id, - -- Make the tree distance reflect distance from inclusion - -- TODO TEST Rhinopittecus roxellana - COALESCE(itc.rank_distance, tc.rank_distance) rank_distance, + lc.id AS listing_change_id, lc.designation_id, lc.interval_events_ids, lc.species_listing_id, lc.inclusion_taxon_concept_id, + lc.taxon_concept_id AS original_taxon_concept_id, + -- Make the tree distance reflect distance from inclusion + -- TODO TEST Rhinopittecus roxellana + COALESCE(itc.rank_distance, tc.rank_distance) rank_distance, lc.change_type_id, lc.change_type_name, lc.change_type_rank, @@ -386,17 +397,23 @@ SELECT lc.party_id, lc.listed_geo_entities_ids, lc.excluded_geo_entities_ids, - -- the following ROW_NUMBER call will assign chronological order to listing changes - -- in scope of the affected taxon concept and a particular designation - ROW_NUMBER() OVER ( + -- The following dense_rank() call will assign a unique id to each combination + -- of affected taxon concept, designation, and party. + dense_rank() OVER ( + ORDER BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id + )::BIGINT taxon_party_timeline_id, + -- The following dense_rank() call will assign chronological order to listing + -- changes in scope of the affected taxon concept and a particular + -- designation/party combination + dense_rank() OVER ( PARTITION BY lc.taxon_concept_id, lc.designation_id, lc.interval_events_ids, - -- fix Agapornis fischeri, which has - -- - -- CH R..W - -- LI R..W lc.party_id ORDER BY lc.effective_at, @@ -503,6 +520,10 @@ CREATE INDEX ON applicable_listing_changes_timeline_mv ( taxon_concept_id, designation_id, change_type_id, party_id, effective_at ); +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + taxon_party_timeline_id +); + CREATE INDEX ON applicable_listing_changes_timeline_mv ( taxon_concept_id, change_type_id, effective_at, original_taxon_concept_id ); @@ -527,9 +548,165 @@ CREATE INDEX ON applicable_listing_changes_timeline_mv ( ); CREATE INDEX ON applicable_listing_changes_timeline_mv ( - taxon_concept_id, effective_at, id + taxon_concept_id, effective_at, listing_change_id +); + +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + listing_change_id, taxon_concept_id ); CREATE INDEX ON applicable_listing_changes_timeline_mv ( - id, taxon_concept_id + taxon_party_timeline_id, change_type_name ); + +-- target #- path +-- jsonb_insert(target, path, newval) +CREATE OR REPLACE FUNCTION jsonb_object_merge( + +) RETURNS jsonb LANGUAGE SQL AS +$jsonb_object_merge$ +$jsonb_object_merge$ + +CREATE OR REPLACE FUNCTION merge_listing_state_changes( + initial_state jsonb, + state_change jsonb +) RETURNS jsonb LANGUAGE SQL AS +$merge_listing_state_changes$ + SELECT + 'ADDITION', ( + SELECT jsonb_object_agg( + FROM jsonb_each(initial_state->'ADDITION') + ) + 'RESERVATION', initial_state->'RESERVATION' + +$merge_listing_state_changes$ + +WITH listing_change_groups AS ( + SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.effective_at, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') + ) + lc.taxon_party_timeline_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.taxon_concept_id, + lc.effective_at, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') AS is_adx, + -- Build jsonbs of the form: + -- [rank_distance][species_listing_id][listing_change_id]: TRUE + array_agg( + ARRAY[lc.change_type_name, lc.rank_distance, lc.species_listing_id, lc.listing_change_id]::TEXT[] + ) OVER ( + PARTITION BY + lc.taxon_party_timeline_id, + lc.effective_at, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') + ) listing_changes, + dense_rank() OVER ( + PARTITION BY + lc.taxon_party_timeline_id, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') + ORDER BY + lc.effective_at + )::INT AS change_group_rank + FROM applicable_listing_changes_timeline_mv lc + ORDER BY + lc.taxon_party_timeline_id, + lc.effective_at, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') +), listing_change_groups_with_keyed_info as ( + SELECT *, ( + CASE + WHEN listing_changes IS NULL OR array_length(listing_changes, 1) < 1 + THEN NULL + ELSE ( + SELECT jsonb_object_agg( + d.change_type_name, + d.by_rank_distance + ) by_change_type_name + FROM ( + SELECT + d.change_type_name, + jsonb_object_agg( + d.rank_distance::text, + d.by_species_listing_id + ) OVER ( + PARTITION BY d.change_type_name + ) by_rank_distance + FROM ( + SELECT + d.change_type_name, + d.rank_distance, + jsonb_object_agg( + d.species_listing_id::text, by_listing_change_id + ) OVER ( + PARTITION BY + d.change_type_name, + d.rank_distance + ) AS by_species_listing_id + FROM ( + SELECT + d.change_type_name, + d.rank_distance, + d.species_listing_id, + jsonb_object_agg( + d.listing_change_id::text, true + ) OVER ( + PARTITION BY + d.change_type_name, + d.rank_distance, + d.species_listing_id + ) AS by_listing_change_id + FROM ( + SELECT + unnest((listing_changes::TEXT[][])[1:][1:1]), + unnest((listing_changes::TEXT[][])[1:][2:2]), + unnest((listing_changes::TEXT[][])[1:][3:3]), + unnest((listing_changes::TEXT[][])[1:][4:4]) + ) d (change_type_name, rank_distance, species_listing_id, listing_change_id) + ) d + ) d + ) d + ) + END +) listing_state_changes + FROM listing_change_groups +) RECURSIVE stateful_listing_change_groups AS ( + SELECT + SELECT + lcg.taxon_party_timeline_id, + lcg.designation_id, + lcg.interval_events_ids, + lcg.party_id, + lcg.taxon_concept_id, + lcg.effective_at, + lcg.is_adx, + lcg.listing_state_changes, + lcg.listing_state_changes AS listing_state + FROM listing_change_groups_with_keyed_info lcg + WHERE lcg.change_group_rank = 1 + UNION + SELECT + lcg.taxon_party_timeline_id, + lcg.designation_id, + lcg.interval_events_ids, + lcg.party_id, + lcg.taxon_concept_id, + lcg.effective_at, + lcg.is_adx, + lcg.listing_state_changes, + merge_listing_state_changes( + lcg.listing_state, + lcg.listing_state_changes + ) AS listing_state + FROM listing_change_groups_with_keyed_info lcg + JOIN stateful_listing_change_groups prev_lcg + ON lcg.taxon_party_timeline_id = prev_lcg.taxon_party_timeline_id + AND lcg.change_group_rank = prev_lcg.change_group_rank + 1 +) +select * from foo where taxon_concept_id = 6353 and designation_id = 1; + +select * from applicable_listing_changes_timeline_mv lc where taxon_concept_id = 6353 and designation_id = 1; From 54147a9c245aee58118e5f2da1e250486bd47ca2 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Mon, 23 Mar 2026 16:07:46 +0000 Subject: [PATCH 13/15] stateful_listing_change_groups_dv --- idea-refactor-cascade.sql | 335 +++++++++++++++++++++++--------------- 1 file changed, 201 insertions(+), 134 deletions(-) diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql index ab6513b4f..753535f2e 100644 --- a/idea-refactor-cascade.sql +++ b/idea-refactor-cascade.sql @@ -562,151 +562,218 @@ CREATE INDEX ON applicable_listing_changes_timeline_mv ( -- target #- path -- jsonb_insert(target, path, newval) CREATE OR REPLACE FUNCTION jsonb_object_merge( - -) RETURNS jsonb LANGUAGE SQL AS + old_object JSONB, + new_object JSONB +) RETURNS JSONB LANGUAGE SQL AS $jsonb_object_merge$ + SELECT jsonb_object_agg( + COALESCE(n.key, o.key), + CASE + WHEN jsonb_typeof(o.value) = 'object' AND jsonb_typeof(n.value) = 'object' + THEN jsonb_object_merge(o.value, n.value) + ELSE COALESCE(o.value, n.value) + END + ) + FROM jsonb_each(old_object) o + FULL OUTER JOIN jsonb_each(new_object) n ON o.key = n.key +$jsonb_object_merge$; + +CREATE OR REPLACE FUNCTION jsonb_object_omit( + original_object JSONB, + to_omit TEXT[] +) RETURNS JSONB LANGUAGE SQL AS $jsonb_object_merge$ + SELECT jsonb_object_agg( + o.key, + o.value + ) FILTER ( + WHERE o.key != ANY(to_omit) + ) + FROM jsonb_each(original_object) o +$jsonb_object_merge$; CREATE OR REPLACE FUNCTION merge_listing_state_changes( - initial_state jsonb, - state_change jsonb -) RETURNS jsonb LANGUAGE SQL AS + initial_state hstore[], + state_change hstore[] +) RETURNS hstore[] LANGUAGE SQL AS $merge_listing_state_changes$ - SELECT - 'ADDITION', ( - SELECT jsonb_object_agg( - FROM jsonb_each(initial_state->'ADDITION') + SELECT array_agg(final_state.listing_state_change) + FROM ( + WITH listing_state AS ( + SELECT + UNNEST(initial_state)->'change_type_name' AS change_type_name, + UNNEST(initial_state)->'rank_distance' AS rank_distance, + UNNEST(initial_state)->'species_listing_id' AS species_listing_id, + UNNEST(initial_state)->'listing_change_id' AS listing_change_id + ), listing_changes AS ( + SELECT + UNNEST(state_change)->'change_type_name' AS change_type_name, + UNNEST(state_change)->'rank_distance' AS rank_distance, + UNNEST(state_change)->'species_listing_id' AS species_listing_id, + UNNEST(state_change)->'listing_change_id' AS listing_change_id + ), listing_state_distance AS ( + SELECT MIN(rank_distance) AS rank_distance + FROM listing_state + WHERE change_type_name NOT IN ( + 'DELETION', 'RESERVATION_WITHDRAWAL' + ) + ), listing_changes_distance AS ( + SELECT MIN(rank_distance) AS rank_distance + FROM listing_changes + WHERE change_type_name NOT IN ( + 'DELETION', 'RESERVATION_WITHDRAWAL' + ) ) - 'RESERVATION', initial_state->'RESERVATION' + SELECT hstore(o.*) AS listing_state_change + FROM listing_state o + LEFT JOIN listing_changes d + ON ( + (o.change_type_name NOT IN ('DELETION', 'RESERVATION_WITHDRAWAL', 'UNSUPPRESSION') AND d.change_type_name = 'DELETION') + OR + (o.change_type_name = 'RESERVATION' AND d.change_type_name = 'RESERVATION_WITHDRAWAL') + ) + AND o.rank_distance = d.rank_distance + AND o.species_listing_id = d.species_listing_id + WHERE d.listing_change_id IS NULL + UNION + -- SUPPRESSIONS + SELECT hstore(suppressions.*) AS listing_state_change + FROM ( + SELECT + DISTINCT ON ( + rank_distance, + species_listing_id + ) + 'SUPPRESSION' AS change_type_name, + o.rank_distance AS rank_distance, + o.species_listing_id AS species_listing_id, + o.listing_change_id AS listing_change_id + FROM listing_state o + WHERE EXISTS ( + SELECT TRUE + FROM listing_state_distance lsd + WHERE lsd.rank_distance < o.rank_distance + ) + ) suppressions + UNION + -- UNSUPPRESSIONS + SELECT hstore(unsuppressions.*) AS listing_state_change + FROM ( + SELECT + DISTINCT ON ( + rank_distance, + species_listing_id + ) + 'UNSUPPRESSION' AS change_type_name, + o.rank_distance AS rank_distance, + o.species_listing_id AS species_listing_id, + o.listing_change_id AS listing_change_id + FROM listing_state o + WHERE EXISTS ( + SELECT TRUE + FROM listing_state_distance lsd + WHERE lsd.rank_distance < o.rank_distance + ) AND EXISTS ( + SELECT TRUE + FROM listing_changes_distance lcd + WHERE lcd.rank_distance = o.rank_distance + ) + ) unsuppressions + UNION + SELECT hstore(listing_changes.*) AS listing_state_change + FROM listing_changes + ) final_state; +$merge_listing_state_changes$; -$merge_listing_state_changes$ -WITH listing_change_groups AS ( - SELECT - DISTINCT ON ( - lc.taxon_party_timeline_id, - lc.effective_at, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') - ) - lc.taxon_party_timeline_id, - lc.designation_id, - lc.interval_events_ids, - lc.party_id, - lc.taxon_concept_id, - lc.effective_at, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') AS is_adx, - -- Build jsonbs of the form: - -- [rank_distance][species_listing_id][listing_change_id]: TRUE - array_agg( - ARRAY[lc.change_type_name, lc.rank_distance, lc.species_listing_id, lc.listing_change_id]::TEXT[] - ) OVER ( - PARTITION BY + +DROP VIEW IF EXISTS stateful_listing_change_groups_dv CASCADE; +CREATE OR REPLACE VIEW stateful_listing_change_groups_dv AS + WITH RECURSIVE stateful_listing_change_groups AS ( + WITH listing_change_groups AS ( + SELECT + DISTINCT ON ( lc.taxon_party_timeline_id, lc.effective_at, lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') - ) listing_changes, - dense_rank() OVER ( - PARTITION BY - lc.taxon_party_timeline_id, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') - ORDER BY - lc.effective_at - )::INT AS change_group_rank - FROM applicable_listing_changes_timeline_mv lc - ORDER BY - lc.taxon_party_timeline_id, - lc.effective_at, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') -), listing_change_groups_with_keyed_info as ( - SELECT *, ( - CASE - WHEN listing_changes IS NULL OR array_length(listing_changes, 1) < 1 - THEN NULL - ELSE ( - SELECT jsonb_object_agg( - d.change_type_name, - d.by_rank_distance - ) by_change_type_name - FROM ( - SELECT - d.change_type_name, - jsonb_object_agg( - d.rank_distance::text, - d.by_species_listing_id - ) OVER ( - PARTITION BY d.change_type_name - ) by_rank_distance - FROM ( - SELECT - d.change_type_name, - d.rank_distance, - jsonb_object_agg( - d.species_listing_id::text, by_listing_change_id - ) OVER ( - PARTITION BY - d.change_type_name, - d.rank_distance - ) AS by_species_listing_id - FROM ( - SELECT - d.change_type_name, - d.rank_distance, - d.species_listing_id, - jsonb_object_agg( - d.listing_change_id::text, true - ) OVER ( - PARTITION BY - d.change_type_name, - d.rank_distance, - d.species_listing_id - ) AS by_listing_change_id - FROM ( - SELECT - unnest((listing_changes::TEXT[][])[1:][1:1]), - unnest((listing_changes::TEXT[][])[1:][2:2]), - unnest((listing_changes::TEXT[][])[1:][3:3]), - unnest((listing_changes::TEXT[][])[1:][4:4]) - ) d (change_type_name, rank_distance, species_listing_id, listing_change_id) - ) d - ) d - ) d + ) + lc.taxon_party_timeline_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.taxon_concept_id, + lc.effective_at, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') AS is_adx, + array_agg( + hstore(ARRAY[ + ['change_type_name', lc.change_type_name], + ['rank_distance', lc.rank_distance], + ['species_listing_id', lc.species_listing_id], + ['listing_change_id', lc.listing_change_id] + ]::TEXT[][]) + ) OVER ( + PARTITION BY + lc.taxon_party_timeline_id, + lc.effective_at, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') + ) listing_changes, + dense_rank() OVER ( + PARTITION BY + lc.taxon_party_timeline_id, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') + ORDER BY + lc.effective_at + )::INT AS change_group_rank + FROM applicable_listing_changes_timeline_mv lc + ORDER BY + lc.taxon_party_timeline_id, + lc.effective_at, + lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') ) - END -) listing_state_changes - FROM listing_change_groups -) RECURSIVE stateful_listing_change_groups AS ( SELECT - SELECT - lcg.taxon_party_timeline_id, - lcg.designation_id, - lcg.interval_events_ids, - lcg.party_id, - lcg.taxon_concept_id, - lcg.effective_at, - lcg.is_adx, - lcg.listing_state_changes, - lcg.listing_state_changes AS listing_state - FROM listing_change_groups_with_keyed_info lcg - WHERE lcg.change_group_rank = 1 - UNION - SELECT - lcg.taxon_party_timeline_id, - lcg.designation_id, - lcg.interval_events_ids, - lcg.party_id, - lcg.taxon_concept_id, - lcg.effective_at, - lcg.is_adx, - lcg.listing_state_changes, - merge_listing_state_changes( - lcg.listing_state, - lcg.listing_state_changes - ) AS listing_state - FROM listing_change_groups_with_keyed_info lcg - JOIN stateful_listing_change_groups prev_lcg - ON lcg.taxon_party_timeline_id = prev_lcg.taxon_party_timeline_id - AND lcg.change_group_rank = prev_lcg.change_group_rank + 1 -) -select * from foo where taxon_concept_id = 6353 and designation_id = 1; + lcg.taxon_party_timeline_id, + lcg.designation_id, + lcg.interval_events_ids, + lcg.party_id, + lcg.taxon_concept_id, + lcg.effective_at, + lcg.is_adx, + lcg.change_group_rank, + lcg.listing_changes, + lcg.listing_changes AS listing_state + FROM listing_change_groups lcg + WHERE lcg.change_group_rank = 1 +UNION + SELECT + lcg.taxon_party_timeline_id, + lcg.designation_id, + lcg.interval_events_ids, + lcg.party_id, + lcg.taxon_concept_id, + lcg.effective_at, + lcg.is_adx, + lcg.change_group_rank, + lcg.listing_changes, + merge_listing_state_changes( + prev_lcg.listing_state, + lcg.listing_changes + ) AS listing_state + FROM listing_change_groups lcg + JOIN stateful_listing_change_groups prev_lcg + ON lcg.taxon_party_timeline_id = prev_lcg.taxon_party_timeline_id + AND lcg.change_group_rank = prev_lcg.change_group_rank + 1 + AND lcg.is_adx = prev_lcg.is_adx +) SELECT * FROM stateful_listing_change_groups +; + +create materialized view stateful_listing_change_groups_mv + as select * from stateful_listing_change_groups_dv +; + +CREATE INDEX ON stateful_listing_change_groups_mv ( + taxon_concept_id, designation_id, party_id, effective_at +); + +select * from stateful_listing_change_groups_mv where designation_id = 1 and taxon_concept_id = 12206 -select * from applicable_listing_changes_timeline_mv lc where taxon_concept_id = 6353 and designation_id = 1; +-- select * from applicable_listing_changes_timeline_mv lc where taxon_concept_id = 6353 and designation_id = 1; From 443001fb8a34ae20ec1f3b0874c7ddf0ca5e3b05 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Tue, 24 Mar 2026 08:19:27 +0000 Subject: [PATCH 14/15] fixes for complete_listing_changes_dv --- idea-refactor-cascade.sql | 552 +++++++++++++++++++++++++++----------- 1 file changed, 398 insertions(+), 154 deletions(-) diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql index 753535f2e..2755d5938 100644 --- a/idea-refactor-cascade.sql +++ b/idea-refactor-cascade.sql @@ -35,7 +35,7 @@ CREATE OR REPLACE VIEW taxon_ancestors_dv AS FROM ( SELECT ( '{' || translate(taxonomic_position, '.', ',') || '}' - )::INT[], + )::INTEGER[], * FROM ranks ORDER BY 1 @@ -89,23 +89,23 @@ UNION ALL ; -create materialized view taxon_ancestors_mv - as select * from taxon_ancestors_dv +CREATE MATERIALIZED VIEW taxon_ancestors_mv + AS SELECT * FROM taxon_ancestors_dv ; -create index on taxon_ancestors_mv ( +CREATE INDEX ON taxon_ancestors_mv ( id, ancestor_id ); -create index on taxon_ancestors_mv ( +CREATE INDEX ON taxon_ancestors_mv ( id, rank_distance ); -create index on taxon_ancestors_mv ( +CREATE INDEX ON taxon_ancestors_mv ( id, ancestor_rank_depth ); -create index on taxon_ancestors_mv ( +CREATE INDEX ON taxon_ancestors_mv ( id, ancestor_rank_id ); @@ -276,7 +276,7 @@ WITH designations_and_intervals AS ( lc.change_type_name ORDER BY lc.effective_at::DATE - )::INT + )::INTEGER END AS addition_group_rank, CASE WHEN lc.change_type_name IN ('ADDITION', 'DELETION') THEN dense_rank() OVER ( @@ -287,7 +287,7 @@ WITH designations_and_intervals AS ( lc.party_id ORDER BY lc.effective_at::DATE - )::INT + )::INTEGER END AS add_del_group_rank FROM aggregate_lc lc WHERE lc.change_type_name IN ('ADDITION', 'DELETION') @@ -351,7 +351,7 @@ WITH designations_and_intervals AS ( ) AS ag ) SELECT - 0 - ag.deleted_listing_change_id AS id, + ag.deleted_listing_change_id AS id, ag.designation_id, ag.interval_events_ids, ag.taxon_concept_id, @@ -384,9 +384,8 @@ SELECT DISTINCT lc.species_listing_id, lc.inclusion_taxon_concept_id, lc.taxon_concept_id AS original_taxon_concept_id, - -- Make the tree distance reflect distance from inclusion - -- TODO TEST Rhinopittecus roxellana - COALESCE(itc.rank_distance, tc.rank_distance) rank_distance, + lc.id AS original_listing_change_id, + tc.rank_distance, lc.change_type_id, lc.change_type_name, lc.change_type_rank, @@ -419,13 +418,10 @@ SELECT DISTINCT lc.effective_at, lc.change_type_rank, tc.rank_distance DESC - )::INT AS timeline_position + )::INTEGER AS timeline_position FROM implied_listing_changes_view lc JOIN taxon_ancestors_mv tc ON lc.taxon_concept_id = tc.ancestor_id -LEFT JOIN taxon_ancestors_mv itc - ON lc.inclusion_taxon_concept_id = itc.ancestor_id - AND lc.taxon_concept_id = itc.id ; DROP VIEW IF EXISTS taxon_concepts_with_distributions_and_ancestors CASCADE; @@ -489,7 +485,52 @@ SELECT ) AS is_taxonomically_excluded FROM inherited_listing_changes_view lc JOIN taxon_concepts_with_distributions_and_ancestors tc - ON lc.taxon_concept_id = tc.id; + ON lc.taxon_concept_id = tc.id +UNION ALL +SELECT + -- At all descendants of an inclusion, add the listings at the higher level + -- which are current at the time of the listing, with `FALSE` values for both + -- `is_geographically_excluded` and `is_taxonomically_excluded`. + includer.taxon_concept_id, + includer.listing_change_id, + includer.designation_id, + includer.interval_events_ids, + includer.species_listing_id, + includer.inclusion_taxon_concept_id, + includer.taxon_concept_id AS original_taxon_concept_id, + included.id AS original_listing_change_id, + -- TODO TEST Rhinopittecus roxellana + includer.rank_distance, + includer.change_type_id, + includer.change_type_name, + includer.change_type_rank, + includer.event_id, + includer.effective_at, + includer.is_current, + includer.excluded_taxon_concept_ids, + includer.party_id, + included.listed_geo_entities_ids, + included.excluded_geo_entities_ids, + includer.taxon_party_timeline_id, + includer.timeline_position, + FALSE AS is_geographically_excluded, + FALSE is_taxonomically_excluded +FROM inherited_listing_changes_view includer +JOIN implied_listing_changes_view included + ON includer.inclusion_taxon_concept_id = included.taxon_concept_id + AND included.change_type_name = 'ADDITION' + AND included.effective_at <= includer.effective_at + AND NOT EXISTS ( + SELECT TRUE + FROM implied_listing_changes_view deletion + WHERE included.taxon_concept_id = deletion.taxon_concept_id + AND included.species_listing_id = deletion.species_listing_id + AND included.effective_at < deletion.effective_at + AND includer.effective_at >= deletion.effective_at + ) +JOIN taxon_concepts_with_distributions_and_ancestors tc + ON includer.taxon_concept_id = tc.id +; -- A timeline is identified by: @@ -559,6 +600,10 @@ CREATE INDEX ON applicable_listing_changes_timeline_mv ( taxon_party_timeline_id, change_type_name ); +CREATE INDEX ON applicable_listing_changes_timeline_mv ( + taxon_party_timeline_id, effective_at +); + -- target #- path -- jsonb_insert(target, path, newval) CREATE OR REPLACE FUNCTION jsonb_object_merge( @@ -597,7 +642,7 @@ CREATE OR REPLACE FUNCTION merge_listing_state_changes( state_change hstore[] ) RETURNS hstore[] LANGUAGE SQL AS $merge_listing_state_changes$ - SELECT array_agg(final_state.listing_state_change) + SELECT array_agg(DISTINCT final_state.listing_state_change) FROM ( WITH listing_state AS ( SELECT @@ -611,169 +656,368 @@ $merge_listing_state_changes$ UNNEST(state_change)->'rank_distance' AS rank_distance, UNNEST(state_change)->'species_listing_id' AS species_listing_id, UNNEST(state_change)->'listing_change_id' AS listing_change_id - ), listing_state_distance AS ( - SELECT MIN(rank_distance) AS rank_distance - FROM listing_state - WHERE change_type_name NOT IN ( - 'DELETION', 'RESERVATION_WITHDRAWAL' + ), continuation_records AS ( + -- CONTINUATION + SELECT hstore(o.*) AS listing_state_change + FROM listing_state o + WHERE NOT EXISTS ( + SELECT TRUE FROM listing_changes d + WHERE ( + (o.change_type_name NOT IN ('DELETION', 'RESERVATION_WITHDRAWAL', 'UNSUPPRESSION') AND d.change_type_name = 'DELETION') + OR + (o.change_type_name = 'RESERVATION' AND d.change_type_name = 'RESERVATION_WITHDRAWAL') ) - ), listing_changes_distance AS ( - SELECT MIN(rank_distance) AS rank_distance - FROM listing_changes - WHERE change_type_name NOT IN ( - 'DELETION', 'RESERVATION_WITHDRAWAL' + AND o.rank_distance = d.rank_distance + AND o.species_listing_id = d.species_listing_id + ) + ), deletion_records AS ( + -- DELETION + SELECT hstore(deletions.*) AS listing_state_change + FROM ( + SELECT + 'DELETION' AS change_type_name, + o.rank_distance AS rank_distance, + o.species_listing_id AS species_listing_id, + o.listing_change_id AS listing_change_id + FROM listing_state o + WHERE o.change_type_name = 'ADDITION' + AND NOT EXISTS ( + SELECT TRUE + FROM listing_changes d + WHERE o.change_type_name = d.change_type_name + AND o.rank_distance = d.rank_distance + AND o.species_listing_id = d.species_listing_id ) + ) deletions + ), reservation_withdrawal_records AS ( + -- RESERVATION_WITHDRAWAL + SELECT hstore(reservation_withdrawals.*) AS listing_state_change + FROM ( + SELECT + 'RESERVATION_WITHDRAWAL' AS change_type_name, + o.rank_distance AS rank_distance, + o.species_listing_id AS species_listing_id, + o.listing_change_id AS listing_change_id + FROM listing_state o + WHERE o.change_type_name = 'RESERVATION' + AND NOT EXISTS ( + SELECT TRUE + FROM listing_changes d + WHERE o.change_type_name = d.change_type_name + AND o.rank_distance = d.rank_distance + AND o.species_listing_id = d.species_listing_id + ) + ) reservation_withdrawals + ), addition_and_reservation_records AS ( + -- ADDITION, RESERVATION + SELECT hstore(d.*) AS listing_state_change + FROM listing_changes d + WHERE d.change_type_name IN ('ADDITION', 'RESERVATION') + ), listing_state_distance AS ( + SELECT + MIN(listing_state.rank_distance) AS rank_distance + FROM listing_state + WHERE change_type_name IN ('ADDITION', 'RESERVATION') + ), listing_changes_distance AS ( + SELECT + MIN((rd.listing_state_change->'rank_distance')::INTEGER) AS rank_distance + FROM ( + SELECT listing_state_change + FROM addition_and_reservation_records + UNION + SELECT listing_state_change + FROM continuation_records + ) rd ) - SELECT hstore(o.*) AS listing_state_change - FROM listing_state o - LEFT JOIN listing_changes d - ON ( - (o.change_type_name NOT IN ('DELETION', 'RESERVATION_WITHDRAWAL', 'UNSUPPRESSION') AND d.change_type_name = 'DELETION') - OR - (o.change_type_name = 'RESERVATION' AND d.change_type_name = 'RESERVATION_WITHDRAWAL') - ) - AND o.rank_distance = d.rank_distance - AND o.species_listing_id = d.species_listing_id - WHERE d.listing_change_id IS NULL + SELECT * FROM deletion_records + UNION + SELECT * FROM reservation_withdrawal_records + UNION + SELECT * FROM continuation_records + UNION + SELECT * FROM addition_and_reservation_records UNION -- SUPPRESSIONS - SELECT hstore(suppressions.*) AS listing_state_change + SELECT + r.listing_state_change || hstore( + ARRAY[['change_type_name', 'SUPPRESSION']] + ) AS listing_state_change FROM ( - SELECT - DISTINCT ON ( - rank_distance, - species_listing_id - ) - 'SUPPRESSION' AS change_type_name, - o.rank_distance AS rank_distance, - o.species_listing_id AS species_listing_id, - o.listing_change_id AS listing_change_id - FROM listing_state o - WHERE EXISTS ( - SELECT TRUE - FROM listing_state_distance lsd - WHERE lsd.rank_distance < o.rank_distance - ) - ) suppressions + SELECT * FROM continuation_records + UNION + SELECT * FROM addition_and_reservation_records + ) r + WHERE EXISTS ( + SELECT TRUE + FROM listing_state_distance lsd + WHERE lsd.rank_distance::INTEGER < (r.listing_state_change->'rank_distance')::INTEGER + ) UNION -- UNSUPPRESSIONS - SELECT hstore(unsuppressions.*) AS listing_state_change - FROM ( - SELECT - DISTINCT ON ( - rank_distance, - species_listing_id - ) - 'UNSUPPRESSION' AS change_type_name, - o.rank_distance AS rank_distance, - o.species_listing_id AS species_listing_id, - o.listing_change_id AS listing_change_id - FROM listing_state o - WHERE EXISTS ( - SELECT TRUE - FROM listing_state_distance lsd - WHERE lsd.rank_distance < o.rank_distance - ) AND EXISTS ( - SELECT TRUE - FROM listing_changes_distance lcd - WHERE lcd.rank_distance = o.rank_distance - ) - ) unsuppressions - UNION - SELECT hstore(listing_changes.*) AS listing_state_change - FROM listing_changes + SELECT + r.listing_state_change || hstore( + ARRAY[['change_type_name', 'UNSUPPRESSION']] + ) AS listing_state_change + FROM continuation_records r + WHERE EXISTS ( + SELECT TRUE + FROM listing_state_distance lsd + WHERE lsd.rank_distance::INTEGER < (r.listing_state_change->'rank_distance')::INTEGER + ) AND EXISTS ( + SELECT TRUE + FROM listing_changes_distance lcd + WHERE lcd.rank_distance::INTEGER = (r.listing_state_change->'rank_distance')::INTEGER + ) ) final_state; $merge_listing_state_changes$; - DROP VIEW IF EXISTS stateful_listing_change_groups_dv CASCADE; CREATE OR REPLACE VIEW stateful_listing_change_groups_dv AS WITH RECURSIVE stateful_listing_change_groups AS ( WITH listing_change_groups AS ( - SELECT - DISTINCT ON ( + SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.effective_at + ) lc.taxon_party_timeline_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.taxon_concept_id, lc.effective_at, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') - ) + array_agg( + hstore(ARRAY[ + ['change_type_name', lc.change_type_name], + ['rank_distance', lc.rank_distance], + ['species_listing_id', lc.species_listing_id], + ['listing_change_id', lc.listing_change_id] + ]::TEXT[][]) + ) OVER ( + PARTITION BY + lc.taxon_party_timeline_id, + lc.effective_at + ) listing_changes, + dense_rank() OVER ( + PARTITION BY + lc.taxon_party_timeline_id + ORDER BY + lc.effective_at + )::INTEGER AS change_group_rank, + count(*) OVER ( + PARTITION BY + lc.taxon_party_timeline_id + ORDER BY + lc.effective_at + )::INTEGER AS change_group_max_rank + FROM applicable_listing_changes_timeline_mv lc + ORDER BY + lc.taxon_party_timeline_id, + lc.effective_at + ) + SELECT + lcg.taxon_party_timeline_id, + lcg.designation_id, + lcg.interval_events_ids, + lcg.party_id, + lcg.taxon_concept_id, + lcg.effective_at, + lcg.change_group_rank, + lcg.listing_changes, + lcg.listing_changes AS listing_state + FROM listing_change_groups lcg + WHERE lcg.change_group_rank = 1 + UNION + SELECT + lcg.taxon_party_timeline_id, + lcg.designation_id, + lcg.interval_events_ids, + lcg.party_id, + lcg.taxon_concept_id, + lcg.effective_at, + lcg.change_group_rank, + lcg.listing_changes, + merge_listing_state_changes( + prev_lcg.listing_state, + lcg.listing_changes + ) AS listing_state + FROM listing_change_groups lcg + JOIN stateful_listing_change_groups prev_lcg + ON lcg.taxon_party_timeline_id = prev_lcg.taxon_party_timeline_id + AND lcg.change_group_rank = prev_lcg.change_group_rank + 1 + ) SELECT * FROM stateful_listing_change_groups +; + +CREATE MATERIALIZED VIEW stateful_listing_change_groups_mv + AS SELECT * FROM stateful_listing_change_groups_dv +; + +CREATE INDEX ON stateful_listing_change_groups_mv ( + taxon_concept_id, designation_id, party_id, effective_at +); + +CREATE INDEX ON stateful_listing_change_groups_mv ( + taxon_party_timeline_id, effective_at +); + +DROP VIEW IF EXISTS complete_listing_changes_dv CASCADE; +CREATE OR REPLACE VIEW complete_listing_changes_dv AS +WITH to_list AS ( + -- ADDITION, RESERVATION + SELECT + DISTINCT ON ( lc.taxon_party_timeline_id, - lc.designation_id, - lc.interval_events_ids, - lc.party_id, - lc.taxon_concept_id, - lc.effective_at, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') AS is_adx, - array_agg( - hstore(ARRAY[ - ['change_type_name', lc.change_type_name], - ['rank_distance', lc.rank_distance], - ['species_listing_id', lc.species_listing_id], - ['listing_change_id', lc.listing_change_id] - ]::TEXT[][]) - ) OVER ( - PARTITION BY - lc.taxon_party_timeline_id, - lc.effective_at, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') - ) listing_changes, - dense_rank() OVER ( - PARTITION BY - lc.taxon_party_timeline_id, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') - ORDER BY - lc.effective_at - )::INT AS change_group_rank - FROM applicable_listing_changes_timeline_mv lc - ORDER BY + lc.listing_change_id + ) + lc.taxon_party_timeline_id, + lc.listing_change_id, + min(lcg.effective_at) OVER( + PARTITION BY + lc.taxon_party_timeline_id, + lc.listing_change_id + ) AS effective_at, + lc.change_type_name + FROM applicable_listing_changes_timeline_mv lc + JOIN stateful_listing_change_groups_mv lcg + ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lcg.effective_at = lc.effective_at + AND lc.change_type_name IN ('ADDITION', 'RESERVATION') + UNION ALL + -- DELETION + SELECT + DISTINCT ON ( lc.taxon_party_timeline_id, - lc.effective_at, - lc.change_type_name IN ('ADDITION', 'DELETION', 'EXCLUSION') - ) + lc.listing_change_id + ) + lc.taxon_party_timeline_id, + lc.listing_change_id, + min(lcg.effective_at) OVER( + PARTITION BY + lc.taxon_party_timeline_id, + lc.listing_change_id + ) AS effective_at, + 'DELETION' AS change_type_name + FROM applicable_listing_changes_timeline_mv lc + JOIN stateful_listing_change_groups_mv lcg + ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lcg.effective_at > lc.effective_at + AND EXISTS ( + SELECT TRUE + FROM unnest(lcg.listing_state) AS group_listing_state + WHERE group_listing_state->'species_listing_id' = species_listing_id::TEXT + AND group_listing_state->'change_type_name' = 'DELETION' + ) + UNION ALL + -- SUPPRESSION SELECT - lcg.taxon_party_timeline_id, - lcg.designation_id, - lcg.interval_events_ids, - lcg.party_id, - lcg.taxon_concept_id, - lcg.effective_at, - lcg.is_adx, - lcg.change_group_rank, - lcg.listing_changes, - lcg.listing_changes AS listing_state - FROM listing_change_groups lcg - WHERE lcg.change_group_rank = 1 -UNION + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.listing_change_id + ) + lc.taxon_party_timeline_id, + lc.listing_change_id, + min(lcg.effective_at) OVER( + PARTITION BY + lc.taxon_party_timeline_id, + lc.listing_change_id + ) AS effective_at, + 'SUPPRESSION' AS change_type_name + FROM applicable_listing_changes_timeline_mv lc + JOIN stateful_listing_change_groups_mv lcg + ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lcg.effective_at >= lc.effective_at + AND EXISTS ( + SELECT TRUE + FROM unnest(lcg.listing_state) AS group_listing_state + WHERE group_listing_state->'listing_change_id' = lc.listing_change_id::TEXT + AND group_listing_state->'change_type_name' = 'SUPPRESSION' + ) + UNION ALL SELECT - lcg.taxon_party_timeline_id, - lcg.designation_id, - lcg.interval_events_ids, - lcg.party_id, - lcg.taxon_concept_id, - lcg.effective_at, - lcg.is_adx, - lcg.change_group_rank, - lcg.listing_changes, - merge_listing_state_changes( - prev_lcg.listing_state, - lcg.listing_changes - ) AS listing_state - FROM listing_change_groups lcg - JOIN stateful_listing_change_groups prev_lcg - ON lcg.taxon_party_timeline_id = prev_lcg.taxon_party_timeline_id - AND lcg.change_group_rank = prev_lcg.change_group_rank + 1 - AND lcg.is_adx = prev_lcg.is_adx -) SELECT * FROM stateful_listing_change_groups + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.listing_change_id + ) + lc.taxon_party_timeline_id, + lc.listing_change_id, + min(lcg.effective_at) OVER( + PARTITION BY + lc.taxon_party_timeline_id, + lc.listing_change_id + ) AS effective_at, + 'UNSUPPRESSION' AS change_type_name + FROM applicable_listing_changes_timeline_mv lc + JOIN stateful_listing_change_groups_mv lcg + ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lcg.effective_at > lc.effective_at + AND EXISTS ( + SELECT TRUE + FROM unnest(lcg.listing_state) AS group_listing_state + WHERE group_listing_state->'listing_change_id' = lc.listing_change_id::TEXT + AND group_listing_state->'change_type_name' = 'UNSUPPRESSION' + ) + WHERE lc.change_type_name IN ('ADDITION', 'RESERVATION') +) +SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.listing_change_id, + lc.species_listing_id, + lx.effective_at, + lx.change_type_name + ) + lc.taxon_concept_id, + lc.listing_change_id, + lc.designation_id, + lc.interval_events_ids, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + lc.original_taxon_concept_id, + lc.original_listing_change_id, + lc.rank_distance, + ct.id AS change_type_id, + ct.name AS change_type_name, + lc.event_id, + lx.effective_at, + lc.is_current, + lc.excluded_taxon_concept_ids, + lc.party_id, + lc.listed_geo_entities_ids, + lc.excluded_geo_entities_ids, + lc.taxon_party_timeline_id, + lc.timeline_position, + lc.is_geographically_excluded, + lc.is_taxonomically_excluded +FROM + applicable_listing_changes_timeline_mv lc +JOIN to_list lx + ON lx.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lx.listing_change_id = lc.listing_change_id +JOIN change_types_view ct + ON ct.designation_id = lc.designation_id + AND ct.name = lx.change_type_name +ORDER BY + lc.taxon_party_timeline_id, + lc.listing_change_id, + lx.effective_at ; -create materialized view stateful_listing_change_groups_mv - as select * from stateful_listing_change_groups_dv +CREATE MATERIALIZED VIEW complete_listing_changes_mv + AS SELECT * FROM complete_listing_changes_dv ; -CREATE INDEX ON stateful_listing_change_groups_mv ( +CREATE INDEX ON complete_listing_changes_mv ( taxon_concept_id, designation_id, party_id, effective_at ); -select * from stateful_listing_change_groups_mv where designation_id = 1 and taxon_concept_id = 12206 +CREATE INDEX ON complete_listing_changes_mv ( + taxon_party_timeline_id, effective_at +); + +SELECT * FROM complete_listing_changes_mv WHERE designation_id = 1 AND taxon_concept_id = ( + SELECT taxon_concept_id FROM complete_listing_changes_mv WHERE designation_id = 1 AND change_type_name = 'UNSUPPRESSION' +); +; -- select * from applicable_listing_changes_timeline_mv lc where taxon_concept_id = 6353 and designation_id = 1; From a6253067870803058e6148301b3e970b2b903927 Mon Sep 17 00:00:00 2001 From: Daniel Perrett Date: Tue, 24 Mar 2026 12:16:12 +0000 Subject: [PATCH 15/15] more fixes --- idea-refactor-cascade.sql | 126 +++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 48 deletions(-) diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql index 2755d5938..1b1a5f0b8 100644 --- a/idea-refactor-cascade.sql +++ b/idea-refactor-cascade.sql @@ -400,7 +400,7 @@ SELECT DISTINCT -- of affected taxon concept, designation, and party. dense_rank() OVER ( ORDER BY - lc.taxon_concept_id, + tc.id, lc.designation_id, lc.interval_events_ids, lc.party_id @@ -453,7 +453,7 @@ JOIN ( JOIN ranks r ON ta.ancestor_rank_id = r.id GROUP BY ta.id ) ta ON tc.id = ta.id -JOIN ( +LEFT JOIN ( SELECT taxon_concept_id "id", array_agg(geo_entity_id) AS geo_entity_ids @@ -462,8 +462,9 @@ JOIN ( ) td ON tc.id = td.id; -DROP VIEW IF EXISTS applicable_implied_taxon_listing_changes_view CASCADE; -CREATE OR REPLACE VIEW applicable_implied_taxon_listing_changes_view AS +-- this one is quite slow on staging - 2m for SELECT 2688636 +DROP VIEW IF EXISTS applicable_inherited_taxon_listing_changes_dv CASCADE; +CREATE OR REPLACE VIEW applicable_inherited_taxon_listing_changes_dv AS SELECT lc.*, ( @@ -497,8 +498,8 @@ SELECT includer.interval_events_ids, includer.species_listing_id, includer.inclusion_taxon_concept_id, - includer.taxon_concept_id AS original_taxon_concept_id, - included.id AS original_listing_change_id, + included.taxon_concept_id AS original_taxon_concept_id, + included.id AS original_listing_change_id, -- TODO TEST Rhinopittecus roxellana includer.rank_distance, includer.change_type_id, @@ -506,8 +507,8 @@ SELECT includer.change_type_rank, includer.event_id, includer.effective_at, - includer.is_current, - includer.excluded_taxon_concept_ids, + included.is_current, + included.excluded_taxon_concept_ids, includer.party_id, included.listed_geo_entities_ids, included.excluded_geo_entities_ids, @@ -518,6 +519,7 @@ SELECT FROM inherited_listing_changes_view includer JOIN implied_listing_changes_view included ON includer.inclusion_taxon_concept_id = included.taxon_concept_id + AND included.species_listing_id = includer.species_listing_id AND included.change_type_name = 'ADDITION' AND included.effective_at <= includer.effective_at AND NOT EXISTS ( @@ -554,33 +556,33 @@ DROP TABLE IF EXISTS tmp_all_listing_changes_timeline_matview; DROP TABLE IF EXISTS applicable_listing_changes_timeline_dt; DROP TABLE IF EXISTS applicable_listing_changes_timeline_mt; -CREATE MATERIALIZED VIEW applicable_listing_changes_timeline_mv AS - SELECT * FROM applicable_implied_taxon_listing_changes_view; +CREATE MATERIALIZED VIEW applicable_inherited_taxon_listing_changes_mv AS + SELECT * FROM applicable_inherited_taxon_listing_changes_dv; -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( taxon_concept_id, designation_id, change_type_id, party_id, effective_at ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( taxon_party_timeline_id ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( taxon_concept_id, change_type_id, effective_at, original_taxon_concept_id ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( change_type_id, taxon_concept_id, original_taxon_concept_id ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( species_listing_id, taxon_concept_id ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( taxon_concept_id, designation_id, party_id ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( original_taxon_concept_id, taxon_concept_id, designation_id, @@ -588,19 +590,19 @@ CREATE INDEX ON applicable_listing_changes_timeline_mv ( party_id ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( taxon_concept_id, effective_at, listing_change_id ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( listing_change_id, taxon_concept_id ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( taxon_party_timeline_id, change_type_name ); -CREATE INDEX ON applicable_listing_changes_timeline_mv ( +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( taxon_party_timeline_id, effective_at ); @@ -729,31 +731,31 @@ $merge_listing_state_changes$ FROM continuation_records ) rd ) - SELECT * FROM deletion_records + SELECT listing_state_change FROM deletion_records UNION - SELECT * FROM reservation_withdrawal_records + SELECT listing_state_change FROM reservation_withdrawal_records UNION - SELECT * FROM continuation_records + SELECT listing_state_change FROM continuation_records UNION - SELECT * FROM addition_and_reservation_records + SELECT listing_state_change FROM addition_and_reservation_records UNION - -- SUPPRESSIONS + -- SUPPRESSION SELECT r.listing_state_change || hstore( ARRAY[['change_type_name', 'SUPPRESSION']] ) AS listing_state_change FROM ( - SELECT * FROM continuation_records + SELECT listing_state_change FROM continuation_records UNION - SELECT * FROM addition_and_reservation_records + SELECT listing_state_change FROM addition_and_reservation_records ) r WHERE EXISTS ( SELECT TRUE - FROM listing_state_distance lsd - WHERE lsd.rank_distance::INTEGER < (r.listing_state_change->'rank_distance')::INTEGER + FROM listing_changes_distance lcd + WHERE lcd.rank_distance::INTEGER < (r.listing_state_change->'rank_distance')::INTEGER ) UNION - -- UNSUPPRESSIONS + -- UNSUPPRESSION SELECT r.listing_state_change || hstore( ARRAY[['change_type_name', 'UNSUPPRESSION']] @@ -772,6 +774,7 @@ $merge_listing_state_changes$ $merge_listing_state_changes$; +-- slow on staging - 2-3m for SELECT 2116404 rows DROP VIEW IF EXISTS stateful_listing_change_groups_dv CASCADE; CREATE OR REPLACE VIEW stateful_listing_change_groups_dv AS WITH RECURSIVE stateful_listing_change_groups AS ( @@ -811,7 +814,7 @@ CREATE OR REPLACE VIEW stateful_listing_change_groups_dv AS ORDER BY lc.effective_at )::INTEGER AS change_group_max_rank - FROM applicable_listing_changes_timeline_mv lc + FROM applicable_inherited_taxon_listing_changes_mv lc ORDER BY lc.taxon_party_timeline_id, lc.effective_at @@ -861,6 +864,7 @@ CREATE INDEX ON stateful_listing_change_groups_mv ( taxon_party_timeline_id, effective_at ); +-- 1-2m on staging DROP VIEW IF EXISTS complete_listing_changes_dv CASCADE; CREATE OR REPLACE VIEW complete_listing_changes_dv AS WITH to_list AS ( @@ -878,7 +882,7 @@ WITH to_list AS ( lc.listing_change_id ) AS effective_at, lc.change_type_name - FROM applicable_listing_changes_timeline_mv lc + FROM applicable_inherited_taxon_listing_changes_mv lc JOIN stateful_listing_change_groups_mv lcg ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id AND lcg.effective_at = lc.effective_at @@ -898,14 +902,15 @@ WITH to_list AS ( lc.listing_change_id ) AS effective_at, 'DELETION' AS change_type_name - FROM applicable_listing_changes_timeline_mv lc + FROM applicable_inherited_taxon_listing_changes_mv lc JOIN stateful_listing_change_groups_mv lcg ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id AND lcg.effective_at > lc.effective_at AND EXISTS ( SELECT TRUE FROM unnest(lcg.listing_state) AS group_listing_state - WHERE group_listing_state->'species_listing_id' = species_listing_id::TEXT + WHERE group_listing_state->'species_listing_id' = lc.species_listing_id::TEXT + AND group_listing_state->'rank_distance' = lc.rank_distance::TEXT AND group_listing_state->'change_type_name' = 'DELETION' ) UNION ALL @@ -923,7 +928,7 @@ WITH to_list AS ( lc.listing_change_id ) AS effective_at, 'SUPPRESSION' AS change_type_name - FROM applicable_listing_changes_timeline_mv lc + FROM applicable_inherited_taxon_listing_changes_mv lc JOIN stateful_listing_change_groups_mv lcg ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id AND lcg.effective_at >= lc.effective_at @@ -947,7 +952,7 @@ WITH to_list AS ( lc.listing_change_id ) AS effective_at, 'UNSUPPRESSION' AS change_type_name - FROM applicable_listing_changes_timeline_mv lc + FROM applicable_inherited_taxon_listing_changes_mv lc JOIN stateful_listing_change_groups_mv lcg ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id AND lcg.effective_at > lc.effective_at @@ -976,8 +981,7 @@ SELECT lc.original_taxon_concept_id, lc.original_listing_change_id, lc.rank_distance, - ct.id AS change_type_id, - ct.name AS change_type_name, + lx.change_type_name, lc.event_id, lx.effective_at, lc.is_current, @@ -990,13 +994,10 @@ SELECT lc.is_geographically_excluded, lc.is_taxonomically_excluded FROM - applicable_listing_changes_timeline_mv lc + applicable_inherited_taxon_listing_changes_mv lc JOIN to_list lx ON lx.taxon_party_timeline_id = lc.taxon_party_timeline_id AND lx.listing_change_id = lc.listing_change_id -JOIN change_types_view ct - ON ct.designation_id = lc.designation_id - AND ct.name = lx.change_type_name ORDER BY lc.taxon_party_timeline_id, lc.listing_change_id, @@ -1015,9 +1016,38 @@ CREATE INDEX ON complete_listing_changes_mv ( taxon_party_timeline_id, effective_at ); -SELECT * FROM complete_listing_changes_mv WHERE designation_id = 1 AND taxon_concept_id = ( - SELECT taxon_concept_id FROM complete_listing_changes_mv WHERE designation_id = 1 AND change_type_name = 'UNSUPPRESSION' -); -; --- select * from applicable_listing_changes_timeline_mv lc where taxon_concept_id = 6353 and designation_id = 1; +SELECT + change_type_name, min(taxon_concept_id), count(*) +FROM complete_listing_changes_mv +WHERE designation_id = 1 +group by 1; + +-- change_type_name | min | count +-- ------------------+-----+-------- +-- RESERVATION | 136 | 64610 +-- ADDITION | 50 | 590385 +-- SUPPRESSION | 186 | 14598 +-- UNSUPPRESSION | 788 | 305 +-- DELETION | 186 | 26388 + +-- SELECT +-- change_type_name, min(taxon_concept_id), count(*) +-- FROM cites_listing_changes_mview +-- group by 1; +-- +-- change_type_name | min | count +-- ------------------------+-----+-------- +-- ADDITION | 50 | 395506 +-- DELETION | 186 | 3645 +-- EXCEPTION | 395 | 2364 +-- RESERVATION | 136 | 42609 +-- RESERVATION_WITHDRAWAL | 136 | 40981 +-- (5 rows) + +-- SELECT * FROM complete_listing_changes_mv WHERE designation_id = 1 AND taxon_concept_id = ( +-- SELECT taxon_concept_id FROM complete_listing_changes_mv WHERE designation_id = 1 AND change_type_name = 'UNSUPPRESSION' +-- ); + + +-- select * from applicable_inherited_taxon_listing_changes_mv lc where taxon_concept_id = 6353 and designation_id = 1;