diff --git a/.rubocop.yml b/.rubocop.yml index 0d729d038..3e3cd3599 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -42,6 +42,10 @@ Layout/IndentationConsistency: Exclude: - 'db/migrate/*' +Style/ParenthesesAroundCondition: + Enabled: true + AllowInMultilineConditions: true + # Configuration parameters: EnforcedStyle, SupportedStyles. # SupportedStyles: symmetrical, new_line, same_line Layout/MultilineArrayBraceLayout: @@ -109,7 +113,6 @@ Layout/ExtraSpacing: Exclude: - 'db/migrate/*' - # Detect hard tabs, no hard tabs. Layout/IndentationStyle: Enabled: true diff --git a/app/models/document.rb b/app/models/document.rb index 5ef7d16cc..0d0e0bfe6 100644 --- a/app/models/document.rb +++ b/app/models/document.rb @@ -56,27 +56,27 @@ class Document < ApplicationRecord include PgSearch::Model ACCEPTED_CONTENT_TYPES = [ - "image/jpeg", # jpg - "image/jpeg", # jpeg - "image/gif", # gif - "image/png", # png - "image/bmp", # bmp - "image/tiff", # tif - "image/tiff", # tiff - "application/vnd.ms-powerpoint", # ppt - "application/vnd.openxmlformats-officedocument.presentationml.presentation", # pptx - "application/vnd.ms-excel", # xls - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # xlsx - "application/rtf", # rtf - "text/plain", # txt - "application/msword", # doc - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # docx - "application/pdf", # pdf - "text/csv", # csv - "text/tab-separated-values", # tsv - "application/vnd.oasis.opendocument.text", # odt - "application/vnd.oasis.opendocument.spreadsheet", # ods - "application/vnd.oasis.opendocument.presentation" # odp + 'image/jpeg', # jpg + 'image/jpeg', # jpeg + 'image/gif', # gif + 'image/png', # png + 'image/bmp', # bmp + 'image/tiff', # tif + 'image/tiff', # tiff + 'application/vnd.ms-powerpoint', # ppt + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', # pptx + 'application/vnd.ms-excel', # xls + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', # xlsx + 'application/rtf', # rtf + 'text/plain', # txt + 'application/msword', # doc + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', # docx + 'application/pdf', # pdf + 'text/csv', # csv + 'text/tab-separated-values', # tsv + 'application/vnd.oasis.opendocument.text', # odt + 'application/vnd.oasis.opendocument.spreadsheet', # ods + 'application/vnd.oasis.opendocument.presentation' # odp ].freeze pg_search_scope :search_by_title, against: :title, diff --git a/app/models/listing_change.rb b/app/models/listing_change.rb index 8bc47e2c2..1e5250cb7 100644 --- a/app/models/listing_change.rb +++ b/app/models/listing_change.rb @@ -171,6 +171,7 @@ def duplicates(comparison_attributes_override = {}) comparison_attributes.merge(comparison_attributes_override.symbolize_keys) ) ) + if party_listing_distribution relation = relation.includes(:party_listing_distribution).references(:party_listing_distribution).where( party_listing_distribution.comparison_conditions( @@ -178,11 +179,13 @@ def duplicates(comparison_attributes_override = {}) ) ) end + if annotation relation = relation.includes(:annotation).references(:annotation).where( annotation.comparison_conditions ) end + relation end @@ -223,22 +226,11 @@ def event_designation_mismatch end end - def listing_change_before_save_callback - # check if annotation should be deleted - if annotation && - annotation.short_note_en.blank? && - annotation.short_note_fr.blank? && - annotation.short_note_es.blank? && - annotation.full_note_en.blank? && - annotation.full_note_fr.blank? && - annotation.full_note_es.blank? - ann = annotation - self.annotation = nil - if ann.reload.listing_changes.empty? - ann.delete - end - end - + ## + # Called before save: if either excluded_geo_entities_ids or + # excluded_taxon_concepts_ids are set, create or replace ListingChanges with + # type `EXCEPTION` linked to this ListingChange (as the parent) accordingly. + def populate_exceptions_from_exclusions original_change_type = ChangeType.find(change_type_id) @excluded_geo_entities_ids = @excluded_geo_entities_ids && @@ -251,7 +243,6 @@ def listing_change_before_save_callback return self if @excluded_geo_entities_ids.nil? && @excluded_taxon_concepts_ids.nil? - new_exclusions = [] exclusion_change_type = ChangeType.find_by( name: ChangeType::EXCEPTION, designation_id: original_change_type.designation_id ) @@ -259,26 +250,62 @@ def listing_change_before_save_callback # geographic exclusions excluded_geo_entities = if @excluded_geo_entities_ids.present? - new_exclusions << ListingChange.new( - change_type_id: exclusion_change_type.id, - species_listing_id: species_listing_id, - taxon_concept_id: taxon_concept_id, - geo_entity_ids: @excluded_geo_entities_ids - ) + [ + ListingChange.new( + change_type_id: exclusion_change_type.id, + species_listing_id: species_listing_id, + taxon_concept_id: taxon_concept_id, + geo_entity_ids: @excluded_geo_entities_ids, + effective_at: effective_at + ) + ] + else + [] end # taxonomic exclusions excluded_taxon_concepts = if @excluded_taxon_concepts_ids.present? @excluded_taxon_concepts_ids.map do |id| - new_exclusions << ListingChange.new( + ListingChange.new( change_type_id: exclusion_change_type.id, species_listing_id: species_listing_id, - taxon_concept_id: id + taxon_concept_id: id, + effective_at: effective_at ) end + else + [] end - self.exclusions = new_exclusions + self.exclusions = excluded_taxon_concepts + excluded_geo_entities + + self + end + + ## + # Before save, check if annotation should be deleted + def delete_empty_annotation + if ( + annotation && + annotation.short_note_en.blank? && + annotation.short_note_fr.blank? && + annotation.short_note_es.blank? && + annotation.full_note_en.blank? && + annotation.full_note_fr.blank? && + annotation.full_note_es.blank? + ) + ann = annotation + self.annotation = nil + + if ann.reload.listing_changes.empty? + ann.delete + end + end + end + + def listing_change_before_save_callback + delete_empty_annotation + populate_exceptions_from_exclusions end end diff --git a/app/models/user.rb b/app/models/user.rb index 66fcf5e2c..956ac9532 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -116,11 +116,13 @@ def can_be_deleted? Reference, TaxonConceptReference, DistributionReference, Trade::AnnualReportUpload, Trade::Shipment ] + for i in 0..tracked_objects.length - 1 if tracked_objects[i].where([ 'created_by_id = :id OR updated_by_id = :id', id: self.id ]).limit(1).count > 0 return false end end + true end @@ -150,19 +152,25 @@ def send_devise_notification(notification, *) def sync_with_captive_breeding_db # Only interested if role, name, encrypted_password, and email is changed. # Or user deleted. - return unless (previous_changes.keys & %w[email role name encrypted_password]).present? || destroyed? + return unless + previous_changes.keys.intersect?( + %w[email role name encrypted_password] + ).present? || destroyed? role_was = previous_changes['role']&.first + action = if destroyed? # User record deleted. :delete elsif is_elibrary_user? || is_manager? # Is admin or elibrary. :create_or_update - elsif role_was == MANAGER || role_was == ELIBRARY_USER # Was admin or elibrary. + elsif role_was == MANAGER || role_was == ELIBRARY_USER # rubocop:disable Lint/DuplicateBranch + # Was admin or elibrary, but (because previous condition not met), is not any more :delete else :none end + return if action == :none email_was = previous_changes['email']&.first @@ -180,6 +188,7 @@ def sync_with_captive_breeding_db CaptiveBreedingUser.create!(email:, name:, encrypted_password:) else # Update the first CB user record, which is using the new email address (if changed). existing_cb_users.first.update!(email:, name:, encrypted_password:) + if existing_cb_users[1].present? # Duplicate user!? Remove it? # TODO: Do not have requirement for this yet, not sure is it safe to delete. # https://unep-wcmc.codebasehq.com/projects/cites-support-maintenance/tickets/241 diff --git a/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql b/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql index d9d8ec975..1d8633a5f 100644 --- a/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql +++ b/db/mviews/003_rebuild_designation_all_listing_changes_mview.sql @@ -1,27 +1,133 @@ - DROP FUNCTION IF EXISTS rebuild_designation_all_listing_changes_mview( - taxonomy taxonomies, designation designations - ); - CREATE OR REPLACE FUNCTION rebuild_designation_all_listing_changes_mview( - taxonomy taxonomies, designation designations, events_ids INT[] - ) RETURNS void - LANGUAGE plpgsql - AS $$ - DECLARE - all_lc_table_name TEXT; - tmp_lc_table_name TEXT; - tc_table_name TEXT; - sql TEXT; - BEGIN - SELECT listing_changes_mview_name('all', designation.name, events_ids) - INTO all_lc_table_name; - SELECT listing_changes_mview_name('tmp', designation.name, events_ids) - INTO tmp_lc_table_name; - - SELECT LOWER(taxonomy.name) || '_taxon_concepts_and_ancestors_view' INTO tc_table_name; +CREATE OR REPLACE VIEW all_listing_changes_view AS + -- affected_taxon_concept is a taxon concept that is affected by this listing + -- change, even though it might not have an explicit connection to it + -- (i.e. it is an ancestor's listing change). + WITH designations_and_intervals AS ( + SELECT + designations.id designation_id, + designations.name designation_name, + designations.taxonomy_id taxonomy_id, + intervals.start_date interval_start_date, + intervals.end_date interval_end_date, + intervals.events_ids interval_events_ids + FROM designations + LEFT JOIN eu_regulations_applicability_view intervals + ON designations.name = 'EU' + ), listing_changes_with_exceptions AS ( + -- the purpose of this CTE is to aggregate excluded taxon concept ids + SELECT + listing_changes.id, + change_types.designation_id, + change_types.name AS change_type_name, + listing_changes.taxon_concept_id, + listing_changes.species_listing_id, + listing_changes.change_type_id, + listing_changes.inclusion_taxon_concept_id, + listing_changes.event_id, + listing_changes.effective_at::DATE, + listing_changes.is_current, + ARRAY_AGG_NOTNULL(taxonomic_exceptions.taxon_concept_id) AS excluded_taxon_concept_ids + FROM listing_changes + LEFT JOIN listing_changes taxonomic_exceptions + ON listing_changes.id = taxonomic_exceptions.parent_id + AND listing_changes.taxon_concept_id != taxonomic_exceptions.taxon_concept_id + JOIN change_types ON change_types.id = listing_changes.change_type_id + JOIN designations_and_intervals + ON designations_and_intervals.designation_id = change_types.designation_id + AND ( + designations_and_intervals IS NULL + OR ARRAY_UPPER(designations_and_intervals.interval_events_ids, 1) IS NULL + OR listing_changes.event_id = ANY(designations_and_intervals.interval_events_ids) + ) + GROUP BY + listing_changes.id, + change_types.designation_id, + change_types.name, + listing_changes.taxon_concept_id, + listing_changes.species_listing_id, + listing_changes.change_type_id, + listing_changes.inclusion_taxon_concept_id, + listing_changes.event_id, + listing_changes.effective_at::DATE, + listing_changes.is_current + ), aggregate_lc AS ( + -- the purpose of this CTE is to aggregate listed and excluded populations + SELECT lc.id, + lc.designation_id, + lc.change_type_name, + lc.taxon_concept_id, + lc.species_listing_id, + lc.change_type_id, + lc.inclusion_taxon_concept_id, + lc.event_id, + lc.effective_at, + lc.is_current, + lc.excluded_taxon_concept_ids, + party_distribution.geo_entity_id AS party_id, + ARRAY_AGG_NOTNULL(listing_distributions.geo_entity_id) AS listed_geo_entities_ids, + ARRAY_AGG_NOTNULL(excluded_distributions.geo_entity_id) AS excluded_geo_entities_ids + FROM listing_changes_with_exceptions lc + LEFT JOIN listing_distributions + ON lc.id = listing_distributions.listing_change_id + AND NOT listing_distributions.is_party + LEFT JOIN listing_distributions party_distribution + ON lc.id = party_distribution.listing_change_id + AND party_distribution.is_party + LEFT JOIN listing_changes population_exceptions + ON lc.id = population_exceptions.parent_id + AND lc.taxon_concept_id = population_exceptions.taxon_concept_id + LEFT JOIN listing_distributions excluded_distributions + ON population_exceptions.id = excluded_distributions.listing_change_id + AND NOT excluded_distributions.is_party + GROUP BY + lc.id, + lc.designation_id, + lc.change_type_name, + lc.taxon_concept_id, + lc.species_listing_id, + lc.change_type_id, + lc.inclusion_taxon_concept_id, + lc.event_id, + lc.effective_at, + lc.is_current, + party_distribution.geo_entity_id, + lc.excluded_taxon_concept_ids +) +SELECT + lc.*, + tc.taxon_concept_id AS affected_taxon_concept_id, + -- Make the tree distance reflect distance from inclusion + -- TODO TEST Rhinopittecus roxellana + COALESCE(itc.tree_distance, tc.tree_distance) tree_distance, + -- the following ROW_NUMBER call will assign chronological order to listing changes + -- in scope of the affected taxon concept and a particular designation + ROW_NUMBER() OVER ( + PARTITION BY tc.taxon_concept_id, designation_id + ORDER BY effective_at, + CASE + WHEN change_type_name = 'DELETION' THEN 0 + WHEN change_type_name = 'RESERVATION_WITHDRAWAL' THEN 1 + WHEN change_type_name = 'ADDITION' THEN 2 + WHEN change_type_name = 'RESERVATION' THEN 3 + WHEN change_type_name = 'EXCEPTION' THEN 4 + END, + tc.tree_distance + -- ??? OR would it be better to + -- COALESCE(itc.tree_distance, tc.tree_distance) tree_distance + )::INT AS timeline_position +FROM aggregate_lc lc +JOIN taxon_concepts_and_ancestors_mview tc + ON lc.taxon_concept_id = tc.ancestor_taxon_concept_id +LEFT JOIN taxon_concepts_and_ancestors_mview itc + ON lc.inclusion_taxon_concept_id = itc.ancestor_taxon_concept_id + AND lc.taxon_concept_id = itc.taxon_concept_id +; - EXECUTE 'DROP TABLE IF EXISTS ' || tmp_lc_table_name || ' CASCADE'; +DROP FUNCTION IF EXISTS rebuild_designation_all_listing_changes_mview( + taxonomy taxonomies, designation designations +); - sql := 'CREATE TEMP TABLE ' || tmp_lc_table_name || ' AS +CREATE OR REPLACE VIEW all_listing_changes_view AS -- affected_taxon_concept -- is a taxon concept that is affected by this listing change, -- even though it might not have an explicit connection to it -- (i.e. it''s an ancestor''s listing change) @@ -99,63 +205,87 @@ lc.effective_at, lc.is_current, party_distribution.geo_entity_id, - lc.excluded_taxon_concept_ids'; + lc.excluded_taxon_concept_ids +; - EXECUTE sql; - - EXECUTE 'CREATE INDEX ON ' || tmp_lc_table_name || ' (taxon_concept_id)'; - -- for the current listing calculation - EXECUTE 'CREATE INDEX ON ' || tmp_lc_table_name || ' (taxon_concept_id, is_current, change_type_name, inclusion_taxon_concept_id)'; - - EXECUTE 'DROP TABLE IF EXISTS ' || all_lc_table_name || ' CASCADE'; - - sql := 'CREATE TEMP TABLE ' || all_lc_table_name || ' AS - SELECT - lc.*, - tc.taxon_concept_id AS affected_taxon_concept_id, - tc.tree_distance, - -- the following ROW_NUMBER call will assign chronological order to listing changes - -- in scope of the affected taxon concept and a particular designation - ROW_NUMBER() OVER ( - PARTITION BY tc.taxon_concept_id, designation_id - ORDER BY effective_at, - CASE - WHEN change_type_name = ''DELETION'' THEN 0 - WHEN change_type_name = ''RESERVATION_WITHDRAWAL'' THEN 1 - WHEN change_type_name = ''ADDITION'' THEN 2 - WHEN change_type_name = ''RESERVATION'' THEN 3 - WHEN change_type_name = ''EXCEPTION'' THEN 4 - END, - tree_distance - )::INT AS timeline_position - FROM ' || tmp_lc_table_name || ' lc - JOIN ' || tc_table_name || ' tc - ON lc.taxon_concept_id = tc.ancestor_taxon_concept_id'; - - EXECUTE sql; +CREATE OR REPLACE FUNCTION rebuild_designation_all_listing_changes_mview( + taxonomy taxonomies, + designation designations, + events_ids INT[] +) RETURNS void + LANGUAGE plpgsql +AS $rebuild_designation_all_listing_changes_mview$ + DECLARE + all_lc_table_name TEXT; + tmp_lc_table_name TEXT; + tc_table_name TEXT; + sql TEXT; + BEGIN + SELECT listing_changes_mview_name('all', designation.name, events_ids) + INTO all_lc_table_name; + SELECT listing_changes_mview_name('tmp', designation.name, events_ids) + INTO tmp_lc_table_name; - EXECUTE 'CREATE INDEX ON ' || all_lc_table_name || ' (designation_id, timeline_position, affected_taxon_concept_id)'; - EXECUTE 'CREATE INDEX ON ' || all_lc_table_name || ' (affected_taxon_concept_id, inclusion_taxon_concept_id)'; - EXECUTE 'CREATE INDEX ON ' || all_lc_table_name || ' (id, affected_taxon_concept_id)'; + SELECT LOWER(taxonomy.name) || '_taxon_concepts_and_ancestors_view' + INTO tc_table_name; - -- make the tree distance reflect distance from inclusion (Rhinopittecus roxellana) - sql := 'UPDATE ' || all_lc_table_name - || ' SET tree_distance = tc.tree_distance - FROM ' || all_lc_table_name || ' alc - JOIN ' || tc_table_name || ' tc - ON alc.inclusion_taxon_concept_id = tc.ancestor_taxon_concept_id - AND alc.affected_taxon_concept_id = tc.taxon_concept_id - WHERE alc.id = ' || all_lc_table_name || '.id - AND alc.affected_taxon_concept_id = ' || all_lc_table_name || '.affected_taxon_concept_id'; + -- First, build the temp table + EXECUTE format( + $format$ + CREATE TABLE %I AS + SELECT * + FROM all_listing_changes_view + WHERE designation_id = %L + %S + $format$, + tmp_lc_table_name, + designation.id, + CASE + WHEN array_length(events_ids, 1) > 0 + THEN format( + 'AND event_ids = %L::INT[]' + event_ids + ) + ELSE '' + END + ); - EXECUTE sql; + -- Then, drop the old table and swap in the temp table. + EXECUTE format( + $format$ + DROP TABLE IF EXISTS %1$I CASCADE; + ALTER %2$I RENAME TO %1$I; + $format$, + all_lc_table_name, + tmp_lc_table_name + ); + EXECUTE format( + $format$ + CREATE INDEX ON %1$I (taxon_concept_id) WHERE is_current; + CREATE INDEX ON %1$I (taxon_concept_id); + CREATE INDEX ON %1$I ( + taxon_concept_id, + is_current, + change_type_name, + inclusion_taxon_concept_id + ); + CREATE INDEX ON %1%I (designation_id, timeline_position, affected_taxon_concept_id) + CREATE INDEX ON %1%I (affected_taxon_concept_id, inclusion_taxon_concept_id) + CREATE INDEX ON %1%I (id, affected_taxon_concept_id) + CREATE INDEX ON %1$I (affected_taxon_concept_id, id); + $format$, + all_lc_table_name + ); END; - $$; +$rebuild_designation_all_listing_changes_mview$; - COMMENT ON FUNCTION rebuild_designation_all_listing_changes_mview( - taxonomy taxonomies, designation designations, events_ids INT[] - ) IS - 'Procedure to create a helper table with all listing changes - + their included / excluded populations - + tree distance between affected taxon concept and the taxon concept this listing change applies to.'; +COMMENT ON FUNCTION rebuild_designation_all_listing_changes_mview( + taxonomy taxonomies, + designation designations, + events_ids INT[] +) IS $comment$ +Procedure to create a helper table with all listing changes ++ their included / excluded populations ++ tree distance between affected taxon concept and the taxon concept this listing change applies to. +$comment$; diff --git a/db/mviews/004a_cites_eu_applicable_listing_changes_for_node.sql b/db/mviews/004a_cites_eu_applicable_listing_changes_for_node.sql index 6263cd92a..ddf5e3983 100644 --- a/db/mviews/004a_cites_eu_applicable_listing_changes_for_node.sql +++ b/db/mviews/004a_cites_eu_applicable_listing_changes_for_node.sql @@ -1,3 +1,196 @@ +-- Todo: Handle `$1` + +WITH RECURSIVE listing_changes_timeline AS ( + SELECT lc.id, + designation_id, + affected_taxon_concept_id AS original_taxon_concept_id, + taxon_concept_id AS current_taxon_concept_id, + CASE -- context + WHEN inclusion_taxon_concept_id IS NULL + THEN HSTORE(species_listing_id::TEXT, taxon_concept_id::TEXT) + ELSE HSTORE(species_listing_id::TEXT, inclusion_taxon_concept_id::TEXT) + END AS context, + inclusion_taxon_concept_id, + species_listing_id, + change_type_id, + event_id, + effective_at, + tree_distance AS context_tree_distance, + timeline_position, + CASE -- is_applicable + WHEN ( + -- there are listed populations + ARRAY_UPPER(listed_geo_entities_ids, 1) IS NOT NULL + -- and the taxon has its own distribution and does not occur in any of them + AND ARRAY_UPPER(taxon_concepts_mview.countries_ids_ary, 1) IS NOT NULL + AND NOT listed_geo_entities_ids && taxon_concepts_mview.countries_ids_ary + ) OR ( + -- when all populations are excluded + ARRAY_UPPER(excluded_geo_entities_ids, 1) IS NOT NULL + AND ARRAY_UPPER(taxon_concepts_mview.countries_ids_ary, 1) IS NOT NULL + AND excluded_geo_entities_ids @> taxon_concepts_mview.countries_ids_ary + ) + THEN FALSE + WHEN ARRAY_UPPER(excluded_taxon_concept_ids, 1) IS NOT NULL + -- if taxon or any of its ancestors is excluded from this listing + AND excluded_taxon_concept_ids && ARRAY[ + affected_taxon_concept_id, + taxon_concepts_mview.kingdom_id, + taxon_concepts_mview.phylum_id, + taxon_concepts_mview.class_id, + taxon_concepts_mview.order_id, + taxon_concepts_mview.family_id, + taxon_concepts_mview.genus_id, + taxon_concepts_mview.species_id + ] + THEN FALSE + ELSE TRUE + END AS is_applicable + FROM all_taxon_listing_changes_view lc + JOIN cites_eu_tmp_taxon_concepts_mview taxon_concepts_mview + ON lc.affected_taxon_concept_id = taxon_concepts_mview.id + WHERE timeline_position = 1 + -- AND lc.affected_taxon_concept_id = $1 + + UNION + + SELECT hi.id, + hi.designation_id, + listing_changes_timeline.original_taxon_concept_id, + hi.taxon_concept_id, + CASE -- context + WHEN hi.inclusion_taxon_concept_id IS NOT NULL + AND ( + AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + OR listing_changes_timeline.context = ''::HSTORE + ) + THEN HSTORE(hi.species_listing_id::TEXT, hi.inclusion_taxon_concept_id::TEXT) + WHEN change_types.name = 'DELETION' + AND hi.taxon_concept_id = hi.affected_taxon_concept_id + THEN listing_changes_timeline.context - ARRAY[hi.species_listing_id::TEXT] + WHEN change_types.name = 'DELETION' + THEN listing_changes_timeline.context - HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- if it is a new listing at closer level that replaces an older listing, wipe out the context + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + AND hi.effective_at > listing_changes_timeline.effective_at + AND change_types.name = 'ADDITION' + THEN HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- if it is a same day split listing we don''t want to wipe the other part of the split from the context + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + AND change_types.name = 'ADDITION' + THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + WHEN hi.tree_distance <= listing_changes_timeline.context_tree_distance + AND hi.affected_taxon_concept_id = hi.taxon_concept_id + AND change_types.name = 'ADDITION' + THEN HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + -- changing this to <= breaks Ursus arctos isabellinus + WHEN hi.tree_distance <= listing_changes_timeline.context_tree_distance + AND change_types.name = 'ADDITION' + THEN listing_changes_timeline.context || HSTORE(hi.species_listing_id::TEXT, hi.taxon_concept_id::TEXT) + ELSE listing_changes_timeline.context + END AS context, + hi.inclusion_taxon_concept_id, + hi.species_listing_id, + hi.change_type_id, + hi.event_id, + hi.effective_at, + CASE -- context_tree_distance + WHEN ( + hi.inclusion_taxon_concept_id IS NOT NULL + AND AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + ) OR hi.tree_distance < listing_changes_timeline.context_tree_distance + THEN hi.tree_distance + ELSE listing_changes_timeline.context_tree_distance + END AS context_tree_distance, + hi.timeline_position, + CASE -- is applicable + WHEN ( + -- there are listed populations + ARRAY_UPPER(hi.listed_geo_entities_ids, 1) IS NOT NULL + -- and the taxon has its own distribution and does not occur in any of them + AND ARRAY_UPPER(taxon_concepts_mview.countries_ids_ary, 1) IS NOT NULL + AND NOT hi.listed_geo_entities_ids && taxon_concepts_mview.countries_ids_ary + ) OR ( + -- when all populations are excluded + ARRAY_UPPER(hi.excluded_geo_entities_ids, 1) IS NOT NULL + AND ARRAY_UPPER(taxon_concepts_mview.countries_ids_ary, 1) IS NOT NULL + AND hi.excluded_geo_entities_ids @> taxon_concepts_mview.countries_ids_ary + ) + THEN FALSE + WHEN ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NOT NULL + -- if taxon or any of its ancestors is excluded from this listing + AND hi.excluded_taxon_concept_ids && ARRAY[ + hi.affected_taxon_concept_id, + taxon_concepts_mview.kingdom_id, + taxon_concepts_mview.phylum_id, + taxon_concepts_mview.class_id, + taxon_concepts_mview.order_id, + taxon_concepts_mview.family_id, + taxon_concepts_mview.genus_id, + taxon_concepts_mview.species_id + ] + THEN FALSE + WHEN listing_changes_timeline.context -> hi.species_listing_id::TEXT = hi.taxon_concept_id::TEXT + OR hi.taxon_concept_id = listing_changes_timeline.original_taxon_concept_id + -- this line to make Moschus leucogaster happy + OR AVALS(listing_changes_timeline.context) @> ARRAY[hi.taxon_concept_id::TEXT] + THEN TRUE + WHEN listing_changes_timeline.context = ''::HSTORE --this would be the case when deleted + AND ( + ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NOT NULL + AND NOT hi.excluded_taxon_concept_ids && ARRAY[hi.affected_taxon_concept_id] + OR ARRAY_UPPER(hi.excluded_taxon_concept_ids, 1) IS NULL + ) + AND hi.inclusion_taxon_concept_id IS NULL + AND hi.change_type_name = 'ADDITION' + THEN TRUE -- allows for re-listing + WHEN hi.tree_distance < listing_changes_timeline.context_tree_distance + THEN TRUE + ELSE FALSE + END AS is_applicable + FROM all_taxon_listing_changes_view hi + JOIN listing_changes_timeline + ON hi.designation_id = listing_changes_timeline.designation_id + AND listing_changes_timeline.original_taxon_concept_id = hi.affected_taxon_concept_id + AND listing_changes_timeline.timeline_position + 1 = hi.timeline_position + JOIN change_types + ON hi.change_type_id = change_types.id + JOIN cites_eu_tmp_taxon_concepts_mview taxon_concepts_mview + ON hi.affected_taxon_concept_id = taxon_concepts_mview.id +) +SELECT listing_changes_timeline.id +FROM listing_changes_timeline +WHERE is_applicable +ORDER BY timeline_position +; + + + + + + + + + + + + + + + + + + + + + + + + + + + + DROP FUNCTION IF EXISTS cites_eu_applicable_listing_changes_for_node(designation_name TEXT, node_id INT); CREATE OR REPLACE FUNCTION cites_eu_applicable_listing_changes_for_node(all_listing_changes_mview TEXT, node_id INT) RETURNS SETOF INT diff --git a/db/mviews/005_rebuild_designation_listing_changes_mview.sql b/db/mviews/005_rebuild_designation_listing_changes_mview.sql index cc973e3b6..f981cfc36 100644 --- a/db/mviews/005_rebuild_designation_listing_changes_mview.sql +++ b/db/mviews/005_rebuild_designation_listing_changes_mview.sql @@ -35,15 +35,15 @@ CREATE OR REPLACE FUNCTION rebuild_designation_listing_changes_mview( deletion_id INT; BEGIN SELECT listing_changes_mview_name('all', designation.name, events_ids) - INTO all_lc_table_name; + INTO all_lc_table_name; SELECT listing_changes_mview_name('tmp', designation.name, events_ids) - INTO raw_lc_table_name; + INTO raw_lc_table_name; SELECT listing_changes_mview_name('tmp_cascaded', designation.name, events_ids) - INTO tmp_lc_table_name; + INTO tmp_lc_table_name; SELECT listing_changes_mview_name('child', designation.name, events_ids) - INTO lc_table_name; + INTO lc_table_name; SELECT listing_changes_mview_name(NULL, designation.name, events_ids) - INTO master_lc_table_name; + INTO master_lc_table_name; RAISE INFO 'Creating %', tmp_lc_table_name; diff --git a/idea-refactor-cascade.sql b/idea-refactor-cascade.sql new file mode 100644 index 000000000..1b1a5f0b8 --- /dev/null +++ b/idea-refactor-cascade.sql @@ -0,0 +1,1053 @@ +BEGIN; +DROP VIEW IF EXISTS taxon_ancestors_dv CASCADE; +CREATE OR REPLACE VIEW taxon_ancestors_dv AS + WITH RECURSIVE ancestries AS ( + -- start with the root nodes + SELECT + "taxonomy_id", + "id", + "rank_id", + '{}'::BIGINT[] AS "ancestor_ids" + FROM "taxon_concepts" roots + WHERE "parent_id" IS NULL + -- It turns out that some non-A names have ancestries + -- AND "name_status" = 'A' + UNION ALL + SELECT + "child"."taxonomy_id", + "child"."id", + "child"."rank_id", + "parent"."ancestor_ids" || ARRAY["parent"."id"::BIGINT] + FROM "ancestries" parent + JOIN "taxon_concepts" child + ON "child"."taxonomy_id" = "parent"."taxonomy_id" + AND "child"."parent_id" = "parent"."id" + ), taxon_ancestors AS ( + SELECT + "taxonomy_id", "id", "rank_id", + unnest(ancestor_ids) AS ancestor_id, + "ancestor_ids" + FROM "ancestries" + ), rank_depths AS ( + SELECT + "id" AS "rank_id", + row_number() OVER() AS "rank_depth" + FROM ( + SELECT ( + '{' || translate(taxonomic_position, '.', ',') || '}' + )::INTEGER[], + * + FROM ranks + ORDER BY 1 + ) r + ), rank_distances AS ( + SELECT + "ancestor_rank"."rank_id" ancestor_rank_id, + "ancestor_rank"."rank_depth" ancestor_rank_depth, + "descendant_rank"."rank_id" descendant_rank_id, + "descendant_rank"."rank_depth" descendant_rank_depth, + "descendant_rank"."rank_depth" - "ancestor_rank"."rank_depth" AS rank_distance + FROM "rank_depths" ancestor_rank + JOIN "rank_depths" descendant_rank + ON "ancestor_rank"."rank_depth" <= "descendant_rank"."rank_depth" + ) + SELECT + "ta"."taxonomy_id", + "ta"."id", + "ta"."rank_id", + "ta"."ancestor_ids", + "ta"."ancestor_ids"[( + array_position("ta"."ancestor_ids", "ta"."ancestor_id") + ):] "path_ids", + "ta"."ancestor_id", + "ancestor_rank_id", + "ancestor_rank_depth", + "descendant_rank_depth" AS "rank_depth", + "rd"."rank_distance" + FROM "taxon_ancestors" ta + JOIN "taxon_concepts" atc + ON "ancestor_id" = "atc"."id" + JOIN "rank_distances" rd + ON "rd"."descendant_rank_id" = "ta"."rank_id" + AND "rd"."ancestor_rank_id" = "atc"."rank_id" +UNION ALL + SELECT + "ta"."taxonomy_id", + "ta"."id", + "ta"."rank_id", + "ta"."ancestor_ids", + '{}' AS "path_ids", + "ta"."id" AS "ancestor_id", + "ta"."rank_id" AS "ancestor_rank_id", + "descendant_rank_depth" AS "ancestor_rank_depth", + "descendant_rank_depth" AS "rank_depth", + 0 AS "rank_distance" + FROM "taxon_ancestors" ta + JOIN "rank_distances" rd + ON "rd"."descendant_rank_id" = "ta"."rank_id" + AND "rd"."ancestor_rank_id" = "ta"."rank_id" + +; + +CREATE MATERIALIZED VIEW taxon_ancestors_mv + AS SELECT * FROM taxon_ancestors_dv +; + +CREATE INDEX ON taxon_ancestors_mv ( + id, ancestor_id +); + +CREATE INDEX ON taxon_ancestors_mv ( + id, rank_distance +); + +CREATE INDEX ON taxon_ancestors_mv ( + id, ancestor_rank_depth +); + +CREATE INDEX ON taxon_ancestors_mv ( + id, ancestor_rank_id +); + +DROP VIEW IF EXISTS change_types_view CASCADE; +CREATE OR REPLACE VIEW change_types_view AS +SELECT + ct.*, + CASE + WHEN ct.name = 'RESERVATION_WITHDRAWAL' THEN 1 + WHEN ct.name = 'DELETION' THEN 2 + WHEN ct.name = 'EXCEPTION' THEN 3 + WHEN ct.name = 'ADDITION' THEN 4 + WHEN ct.name = 'RESERVATION' THEN 5 + END change_type_rank, + CASE + WHEN ct.name IN ('ADDITION', 'DELETION', 'EXCEPTION') THEN 1 + ELSE 2 + END change_type_group_id -- A/D/X, R/W +FROM change_types ct; + +DROP VIEW IF EXISTS implied_listing_changes_view CASCADE; +CREATE OR REPLACE VIEW implied_listing_changes_view AS +-- affected_taxon_concept is a taxon concept that is affected by this listing +-- change, even though it might not have an explicit connection to it +-- (i.e. it is an ancestor's listing change). +WITH designations_and_intervals AS ( + SELECT + designations.id AS designation_id, + designations.name AS designation_name, + designations.taxonomy_id AS taxonomy_id, + intervals.start_date AS interval_start_date, + intervals.end_date AS interval_end_date, + intervals.events_ids AS interval_events_ids + FROM designations + LEFT JOIN eu_regulations_applicability_view intervals + ON designations.name = 'EU' +), listing_changes_with_exclusions AS ( + -- the purpose of this CTE is to aggregate excluded taxon concept ids + SELECT + lc.id, + ct.designation_id, + designations_and_intervals.interval_events_ids, + lc.taxon_concept_id, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + ct.id AS change_type_id, + ct.change_type_rank AS change_type_rank, + ct.name AS change_type_name, + lc.event_id, + -- A bug exists where EXCLUSIONS have `effective_at='2012-09-21 07:32:20'`, + -- instead of that of the parent. + COALESCE(included_lc.effective_at, lc.effective_at)::DATE AS effective_at, + lc.is_current, + ARRAY_AGG_NOTNULL(taxonomic_exclusions.taxon_concept_id) AS excluded_taxon_concept_ids + FROM listing_changes lc + LEFT JOIN listing_changes taxonomic_exclusions + ON lc.id = taxonomic_exclusions.parent_id + AND lc.taxon_concept_id != taxonomic_exclusions.taxon_concept_id + JOIN change_types_view ct ON ct.id = lc.change_type_id + JOIN designations_and_intervals + ON designations_and_intervals.designation_id = ct.designation_id + AND ( + designations_and_intervals IS NULL + OR ARRAY_UPPER(designations_and_intervals.interval_events_ids, 1) IS NULL + OR lc.event_id = ANY(designations_and_intervals.interval_events_ids) + ) + LEFT JOIN listing_changes included_lc + ON lc.parent_id = included_lc.id + GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +), aggregate_lc AS ( + -- the purpose of this CTE is to aggregate listed and excluded populations + -- All rows in this table will go into implied_listing_changes_view + SELECT + lc.id, + lc.designation_id, + lc.interval_events_ids, + lc.taxon_concept_id, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + lc.change_type_id, + lc.change_type_name, + lc.change_type_rank, + lc.event_id, + lc.effective_at, + lc.is_current, + lc.excluded_taxon_concept_ids, + party_distribution.geo_entity_id AS party_id, + ARRAY_AGG_NOTNULL(listing_distributions.geo_entity_id) AS listed_geo_entities_ids, + ARRAY_AGG_NOTNULL(excluded_distributions.geo_entity_id) AS excluded_geo_entities_ids + FROM listing_changes_with_exclusions lc + LEFT JOIN listing_distributions + ON lc.id = listing_distributions.listing_change_id + AND NOT listing_distributions.is_party + LEFT JOIN listing_distributions party_distribution + ON lc.id = party_distribution.listing_change_id + AND party_distribution.is_party + LEFT JOIN listing_changes population_exclusions + ON lc.id = population_exclusions.parent_id + AND lc.taxon_concept_id = population_exclusions.taxon_concept_id + LEFT JOIN listing_distributions excluded_distributions + ON population_exclusions.id = excluded_distributions.listing_change_id + AND NOT excluded_distributions.is_party + GROUP BY + lc.id, + lc.designation_id, + lc.interval_events_ids, + lc.taxon_concept_id, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + lc.change_type_id, + lc.change_type_name, + lc.change_type_rank, + lc.event_id, + lc.effective_at, + lc.is_current, + party_distribution.geo_entity_id, + lc.excluded_taxon_concept_ids +), addition_groups AS ( + SELECT + DISTINCT ON ( + lc.designation_id, + lc.interval_events_ids, + lc.taxon_concept_id, + lc.party_id, + lc.effective_at::DATE + ) + lc.designation_id, + lc.interval_events_ids, + lc.taxon_concept_id, + lc.party_id, + lc.effective_at::DATE AS effective_at, + hstore( + array_agg(ARRAY[species_listing_id, lc.id]::TEXT[]) FILTER ( + WHERE lc.change_type_name = 'ADDITION' + ) OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.effective_at::DATE + ORDER BY + species_listing_id + ) + ) AS additions_by_listing_id, + hstore( + -- todo: multiple listing changes per appendix is possible + array_agg(ARRAY[species_listing_id, lc.id]::TEXT[]) FILTER ( + WHERE lc.change_type_name = 'DELETION' + ) OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.effective_at::DATE + ORDER BY + species_listing_id + ) + ) AS deletions_by_listing_id, + CASE WHEN lc.change_type_name = 'ADDITION' + THEN dense_rank() OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.change_type_name + ORDER BY + lc.effective_at::DATE + )::INTEGER + END AS addition_group_rank, + CASE WHEN lc.change_type_name IN ('ADDITION', 'DELETION') + THEN dense_rank() OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id + ORDER BY + lc.effective_at::DATE + )::INTEGER + END AS add_del_group_rank + FROM aggregate_lc lc + WHERE lc.change_type_name IN ('ADDITION', 'DELETION') + ORDER BY + lc.designation_id, + lc.interval_events_ids, + lc.taxon_concept_id, + lc.party_id, + lc.effective_at::DATE +), synthetic_deletions_needed AS ( + -- TODO: make this recursive and stateful as we cannot rely on additions. + -- OR create synthetic additions instead? + SELECT DISTINCT + ag.designation_id, + ag.interval_events_ids, + ag.taxon_concept_id, + ag.party_id, + ag.addition_group_rank, + ag.effective_at, + unnest(akeys(deletions_by_listing_id))::BIGINT AS species_listing_id, + unnest(avals(deletions_by_listing_id))::BIGINT AS deleted_listing_change_id + FROM ( + SELECT + ag.designation_id, + ag.interval_events_ids, + ag.taxon_concept_id, + ag.addition_group_rank, + ag.party_id, + ag.effective_at::DATE AS effective_at, + ( + prev_ag.additions_by_listing_id + ) - COALESCE( + akeys(ag.deletions_by_listing_id), '{}'::text[] + ) - COALESCE( + array_agg( + (SELECT key FROM each(dg.deletions_by_listing_id)) + ) OVER ( + PARTITION BY + ag.designation_id, + ag.interval_events_ids, + ag.taxon_concept_id, + ag.party_id, + ag.addition_group_rank + ), + '{}'::text[] + ) AS deletions_by_listing_id + FROM addition_groups ag + JOIN addition_groups prev_ag + ON ag.designation_id = prev_ag.designation_id + AND ag.interval_events_ids IS NOT DISTINCT FROM prev_ag.interval_events_ids + AND ag.taxon_concept_id = prev_ag.taxon_concept_id + AND ag.party_id IS NOT DISTINCT FROM prev_ag.party_id + AND ag.addition_group_rank = prev_ag.addition_group_rank + 1 + LEFT JOIN addition_groups dg + ON ag.designation_id = dg.designation_id + AND ag.interval_events_ids IS NOT DISTINCT FROM dg.interval_events_ids + AND ag.taxon_concept_id = dg.taxon_concept_id + AND ag.party_id IS NOT DISTINCT FROM dg.party_id + AND ag.add_del_group_rank > dg.add_del_group_rank + AND prev_ag.add_del_group_rank < dg.add_del_group_rank + ) AS ag +) +SELECT + ag.deleted_listing_change_id AS id, + ag.designation_id, + ag.interval_events_ids, + ag.taxon_concept_id, + ag.species_listing_id, + NULL AS inclusion_taxon_concept_id, + ct.id AS change_type_id, + ct.name AS change_type_name, + ct.change_type_rank AS change_type_rank, + NULL AS event_id, + ag.effective_at AS effective_at, + FALSE AS is_current, + '{}' AS excluded_taxon_concept_ids, + ag.party_id AS party_id, + '{}' AS listed_geo_entities_ids, + '{}' AS excluded_geo_entities_ids +FROM synthetic_deletions_needed ag +JOIN change_types_view ct + ON ct.designation_id = ag.designation_id + AND ct.name = 'DELETION' +UNION ALL +SELECT * FROM aggregate_lc; + +DROP VIEW IF EXISTS inherited_listing_changes_view CASCADE; +CREATE OR REPLACE VIEW inherited_listing_changes_view AS +SELECT DISTINCT + tc.id AS taxon_concept_id, + lc.id AS listing_change_id, + lc.designation_id, + lc.interval_events_ids, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + lc.taxon_concept_id AS original_taxon_concept_id, + lc.id AS original_listing_change_id, + tc.rank_distance, + lc.change_type_id, + lc.change_type_name, + lc.change_type_rank, + lc.event_id, + lc.effective_at, + lc.is_current, + lc.excluded_taxon_concept_ids, + lc.party_id, + lc.listed_geo_entities_ids, + lc.excluded_geo_entities_ids, + -- The following dense_rank() call will assign a unique id to each combination + -- of affected taxon concept, designation, and party. + dense_rank() OVER ( + ORDER BY + tc.id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id + )::BIGINT taxon_party_timeline_id, + -- The following dense_rank() call will assign chronological order to listing + -- changes in scope of the affected taxon concept and a particular + -- designation/party combination + dense_rank() OVER ( + PARTITION BY + lc.taxon_concept_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id + ORDER BY + lc.effective_at, + lc.change_type_rank, + tc.rank_distance DESC + )::INTEGER AS timeline_position +FROM implied_listing_changes_view lc +JOIN taxon_ancestors_mv tc + ON lc.taxon_concept_id = tc.ancestor_id +; + +DROP VIEW IF EXISTS taxon_concepts_with_distributions_and_ancestors CASCADE; +CREATE OR REPLACE VIEW taxon_concepts_with_distributions_and_ancestors AS +SELECT + tc.*, + AVALS(ancestor_fields)::INTEGER[] || tc.id AS ancestor_ids, + (ancestor_fields->'kingdom_id')::INTEGER AS kingdom_id, + (ancestor_fields->'phylum_id')::INTEGER AS phylum_id, + (ancestor_fields->'class_id')::INTEGER AS class_id, + (ancestor_fields->'order_id')::INTEGER AS order_id, + (ancestor_fields->'family_id')::INTEGER AS family_id, + (ancestor_fields->'subfamily_id')::INTEGER AS subfamily_id, + (ancestor_fields->'genus_id')::INTEGER AS genus_id, + (ancestor_fields->'species_id')::INTEGER AS species_id, + (ancestor_fields->'subspecies_id')::INTEGER AS subspecies_id, + td.geo_entity_ids +FROM taxon_concepts tc +JOIN ( + SELECT + ta.id, + hstore( + array_agg(ARRAY[lower(r.name) || '_id', ancestor_id::text]) + ) AS ancestor_fields, + hstore( + array_agg(ARRAY[rank_distance::text, ancestor_id::text]) + ) AS ancestor_id_by_distance + FROM taxon_ancestors_dv ta + JOIN ranks r ON ta.ancestor_rank_id = r.id + GROUP BY ta.id +) ta ON tc.id = ta.id +LEFT JOIN ( + SELECT + taxon_concept_id "id", + array_agg(geo_entity_id) AS geo_entity_ids + FROM distributions d + GROUP BY taxon_concept_id +) td ON tc.id = td.id; + + +-- this one is quite slow on staging - 2m for SELECT 2688636 +DROP VIEW IF EXISTS applicable_inherited_taxon_listing_changes_dv CASCADE; +CREATE OR REPLACE VIEW applicable_inherited_taxon_listing_changes_dv AS +SELECT + lc.*, + ( + -- there are listed populations + ARRAY_UPPER(listed_geo_entities_ids, 1) IS NOT NULL + -- and the taxon has its own distribution and does not occur in any of them + AND ARRAY_UPPER(tc.geo_entity_ids, 1) IS NOT NULL + AND NOT listed_geo_entities_ids && tc.geo_entity_ids + ) OR ( + -- when all populations are excluded + ARRAY_UPPER(excluded_geo_entities_ids, 1) IS NOT NULL + AND ARRAY_UPPER(tc.geo_entity_ids, 1) IS NOT NULL + AND excluded_geo_entities_ids @> tc.geo_entity_ids + ) AS is_geographically_excluded, + ( + ARRAY_UPPER(excluded_taxon_concept_ids, 1) IS NOT NULL + -- if taxon or any of its ancestors is excluded from this listing + AND excluded_taxon_concept_ids && tc.ancestor_ids + ) AS is_taxonomically_excluded +FROM inherited_listing_changes_view lc +JOIN taxon_concepts_with_distributions_and_ancestors tc + ON lc.taxon_concept_id = tc.id +UNION ALL +SELECT + -- At all descendants of an inclusion, add the listings at the higher level + -- which are current at the time of the listing, with `FALSE` values for both + -- `is_geographically_excluded` and `is_taxonomically_excluded`. + includer.taxon_concept_id, + includer.listing_change_id, + includer.designation_id, + includer.interval_events_ids, + includer.species_listing_id, + includer.inclusion_taxon_concept_id, + included.taxon_concept_id AS original_taxon_concept_id, + included.id AS original_listing_change_id, + -- TODO TEST Rhinopittecus roxellana + includer.rank_distance, + includer.change_type_id, + includer.change_type_name, + includer.change_type_rank, + includer.event_id, + includer.effective_at, + included.is_current, + included.excluded_taxon_concept_ids, + includer.party_id, + included.listed_geo_entities_ids, + included.excluded_geo_entities_ids, + includer.taxon_party_timeline_id, + includer.timeline_position, + FALSE AS is_geographically_excluded, + FALSE is_taxonomically_excluded +FROM inherited_listing_changes_view includer +JOIN implied_listing_changes_view included + ON includer.inclusion_taxon_concept_id = included.taxon_concept_id + AND included.species_listing_id = includer.species_listing_id + AND included.change_type_name = 'ADDITION' + AND included.effective_at <= includer.effective_at + AND NOT EXISTS ( + SELECT TRUE + FROM implied_listing_changes_view deletion + WHERE included.taxon_concept_id = deletion.taxon_concept_id + AND included.species_listing_id = deletion.species_listing_id + AND included.effective_at < deletion.effective_at + AND includer.effective_at >= deletion.effective_at + ) +JOIN taxon_concepts_with_distributions_and_ancestors tc + ON includer.taxon_concept_id = tc.id +; + + +-- A timeline is identified by: +-- +-- * `taxon_concept_id` +-- * `designation_id` +-- * `interval_events_ids` +-- * `party_id` (important for e.g. Agapornis fischeri) +-- +-- A timeline can have one or more snapshots. +-- +-- * `taxon_concept_id` +-- * `listing_change_id` +-- * (`designation_id` is strictly redundant, dependent on `listing_change_id`) +-- * `interval_events_ids` +-- * `party_id` +-- * `timeline_position` + + +DROP TABLE IF EXISTS tmp_all_listing_changes_timeline_matview; +DROP TABLE IF EXISTS applicable_listing_changes_timeline_dt; +DROP TABLE IF EXISTS applicable_listing_changes_timeline_mt; + +CREATE MATERIALIZED VIEW applicable_inherited_taxon_listing_changes_mv AS + SELECT * FROM applicable_inherited_taxon_listing_changes_dv; + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + taxon_concept_id, designation_id, change_type_id, party_id, effective_at +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + taxon_party_timeline_id +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + taxon_concept_id, change_type_id, effective_at, original_taxon_concept_id +); +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + change_type_id, taxon_concept_id, original_taxon_concept_id +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + species_listing_id, taxon_concept_id +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + taxon_concept_id, designation_id, party_id +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + original_taxon_concept_id, + taxon_concept_id, + designation_id, + species_listing_id, + party_id +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + taxon_concept_id, effective_at, listing_change_id +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + listing_change_id, taxon_concept_id +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + taxon_party_timeline_id, change_type_name +); + +CREATE INDEX ON applicable_inherited_taxon_listing_changes_mv ( + taxon_party_timeline_id, effective_at +); + +-- target #- path +-- jsonb_insert(target, path, newval) +CREATE OR REPLACE FUNCTION jsonb_object_merge( + old_object JSONB, + new_object JSONB +) RETURNS JSONB LANGUAGE SQL AS +$jsonb_object_merge$ + SELECT jsonb_object_agg( + COALESCE(n.key, o.key), + CASE + WHEN jsonb_typeof(o.value) = 'object' AND jsonb_typeof(n.value) = 'object' + THEN jsonb_object_merge(o.value, n.value) + ELSE COALESCE(o.value, n.value) + END + ) + FROM jsonb_each(old_object) o + FULL OUTER JOIN jsonb_each(new_object) n ON o.key = n.key +$jsonb_object_merge$; + +CREATE OR REPLACE FUNCTION jsonb_object_omit( + original_object JSONB, + to_omit TEXT[] +) RETURNS JSONB LANGUAGE SQL AS +$jsonb_object_merge$ + SELECT jsonb_object_agg( + o.key, + o.value + ) FILTER ( + WHERE o.key != ANY(to_omit) + ) + FROM jsonb_each(original_object) o +$jsonb_object_merge$; + +CREATE OR REPLACE FUNCTION merge_listing_state_changes( + initial_state hstore[], + state_change hstore[] +) RETURNS hstore[] LANGUAGE SQL AS +$merge_listing_state_changes$ + SELECT array_agg(DISTINCT final_state.listing_state_change) + FROM ( + WITH listing_state AS ( + SELECT + UNNEST(initial_state)->'change_type_name' AS change_type_name, + UNNEST(initial_state)->'rank_distance' AS rank_distance, + UNNEST(initial_state)->'species_listing_id' AS species_listing_id, + UNNEST(initial_state)->'listing_change_id' AS listing_change_id + ), listing_changes AS ( + SELECT + UNNEST(state_change)->'change_type_name' AS change_type_name, + UNNEST(state_change)->'rank_distance' AS rank_distance, + UNNEST(state_change)->'species_listing_id' AS species_listing_id, + UNNEST(state_change)->'listing_change_id' AS listing_change_id + ), continuation_records AS ( + -- CONTINUATION + SELECT hstore(o.*) AS listing_state_change + FROM listing_state o + WHERE NOT EXISTS ( + SELECT TRUE FROM listing_changes d + WHERE ( + (o.change_type_name NOT IN ('DELETION', 'RESERVATION_WITHDRAWAL', 'UNSUPPRESSION') AND d.change_type_name = 'DELETION') + OR + (o.change_type_name = 'RESERVATION' AND d.change_type_name = 'RESERVATION_WITHDRAWAL') + ) + AND o.rank_distance = d.rank_distance + AND o.species_listing_id = d.species_listing_id + ) + ), deletion_records AS ( + -- DELETION + SELECT hstore(deletions.*) AS listing_state_change + FROM ( + SELECT + 'DELETION' AS change_type_name, + o.rank_distance AS rank_distance, + o.species_listing_id AS species_listing_id, + o.listing_change_id AS listing_change_id + FROM listing_state o + WHERE o.change_type_name = 'ADDITION' + AND NOT EXISTS ( + SELECT TRUE + FROM listing_changes d + WHERE o.change_type_name = d.change_type_name + AND o.rank_distance = d.rank_distance + AND o.species_listing_id = d.species_listing_id + ) + ) deletions + ), reservation_withdrawal_records AS ( + -- RESERVATION_WITHDRAWAL + SELECT hstore(reservation_withdrawals.*) AS listing_state_change + FROM ( + SELECT + 'RESERVATION_WITHDRAWAL' AS change_type_name, + o.rank_distance AS rank_distance, + o.species_listing_id AS species_listing_id, + o.listing_change_id AS listing_change_id + FROM listing_state o + WHERE o.change_type_name = 'RESERVATION' + AND NOT EXISTS ( + SELECT TRUE + FROM listing_changes d + WHERE o.change_type_name = d.change_type_name + AND o.rank_distance = d.rank_distance + AND o.species_listing_id = d.species_listing_id + ) + ) reservation_withdrawals + ), addition_and_reservation_records AS ( + -- ADDITION, RESERVATION + SELECT hstore(d.*) AS listing_state_change + FROM listing_changes d + WHERE d.change_type_name IN ('ADDITION', 'RESERVATION') + ), listing_state_distance AS ( + SELECT + MIN(listing_state.rank_distance) AS rank_distance + FROM listing_state + WHERE change_type_name IN ('ADDITION', 'RESERVATION') + ), listing_changes_distance AS ( + SELECT + MIN((rd.listing_state_change->'rank_distance')::INTEGER) AS rank_distance + FROM ( + SELECT listing_state_change + FROM addition_and_reservation_records + UNION + SELECT listing_state_change + FROM continuation_records + ) rd + ) + SELECT listing_state_change FROM deletion_records + UNION + SELECT listing_state_change FROM reservation_withdrawal_records + UNION + SELECT listing_state_change FROM continuation_records + UNION + SELECT listing_state_change FROM addition_and_reservation_records + UNION + -- SUPPRESSION + SELECT + r.listing_state_change || hstore( + ARRAY[['change_type_name', 'SUPPRESSION']] + ) AS listing_state_change + FROM ( + SELECT listing_state_change FROM continuation_records + UNION + SELECT listing_state_change FROM addition_and_reservation_records + ) r + WHERE EXISTS ( + SELECT TRUE + FROM listing_changes_distance lcd + WHERE lcd.rank_distance::INTEGER < (r.listing_state_change->'rank_distance')::INTEGER + ) + UNION + -- UNSUPPRESSION + SELECT + r.listing_state_change || hstore( + ARRAY[['change_type_name', 'UNSUPPRESSION']] + ) AS listing_state_change + FROM continuation_records r + WHERE EXISTS ( + SELECT TRUE + FROM listing_state_distance lsd + WHERE lsd.rank_distance::INTEGER < (r.listing_state_change->'rank_distance')::INTEGER + ) AND EXISTS ( + SELECT TRUE + FROM listing_changes_distance lcd + WHERE lcd.rank_distance::INTEGER = (r.listing_state_change->'rank_distance')::INTEGER + ) + ) final_state; +$merge_listing_state_changes$; + + +-- slow on staging - 2-3m for SELECT 2116404 rows +DROP VIEW IF EXISTS stateful_listing_change_groups_dv CASCADE; +CREATE OR REPLACE VIEW stateful_listing_change_groups_dv AS + WITH RECURSIVE stateful_listing_change_groups AS ( + WITH listing_change_groups AS ( + SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.effective_at + ) + lc.taxon_party_timeline_id, + lc.designation_id, + lc.interval_events_ids, + lc.party_id, + lc.taxon_concept_id, + lc.effective_at, + array_agg( + hstore(ARRAY[ + ['change_type_name', lc.change_type_name], + ['rank_distance', lc.rank_distance], + ['species_listing_id', lc.species_listing_id], + ['listing_change_id', lc.listing_change_id] + ]::TEXT[][]) + ) OVER ( + PARTITION BY + lc.taxon_party_timeline_id, + lc.effective_at + ) listing_changes, + dense_rank() OVER ( + PARTITION BY + lc.taxon_party_timeline_id + ORDER BY + lc.effective_at + )::INTEGER AS change_group_rank, + count(*) OVER ( + PARTITION BY + lc.taxon_party_timeline_id + ORDER BY + lc.effective_at + )::INTEGER AS change_group_max_rank + FROM applicable_inherited_taxon_listing_changes_mv lc + ORDER BY + lc.taxon_party_timeline_id, + lc.effective_at + ) + SELECT + lcg.taxon_party_timeline_id, + lcg.designation_id, + lcg.interval_events_ids, + lcg.party_id, + lcg.taxon_concept_id, + lcg.effective_at, + lcg.change_group_rank, + lcg.listing_changes, + lcg.listing_changes AS listing_state + FROM listing_change_groups lcg + WHERE lcg.change_group_rank = 1 + UNION + SELECT + lcg.taxon_party_timeline_id, + lcg.designation_id, + lcg.interval_events_ids, + lcg.party_id, + lcg.taxon_concept_id, + lcg.effective_at, + lcg.change_group_rank, + lcg.listing_changes, + merge_listing_state_changes( + prev_lcg.listing_state, + lcg.listing_changes + ) AS listing_state + FROM listing_change_groups lcg + JOIN stateful_listing_change_groups prev_lcg + ON lcg.taxon_party_timeline_id = prev_lcg.taxon_party_timeline_id + AND lcg.change_group_rank = prev_lcg.change_group_rank + 1 + ) SELECT * FROM stateful_listing_change_groups +; + +CREATE MATERIALIZED VIEW stateful_listing_change_groups_mv + AS SELECT * FROM stateful_listing_change_groups_dv +; + +CREATE INDEX ON stateful_listing_change_groups_mv ( + taxon_concept_id, designation_id, party_id, effective_at +); + +CREATE INDEX ON stateful_listing_change_groups_mv ( + taxon_party_timeline_id, effective_at +); + +-- 1-2m on staging +DROP VIEW IF EXISTS complete_listing_changes_dv CASCADE; +CREATE OR REPLACE VIEW complete_listing_changes_dv AS +WITH to_list AS ( + -- ADDITION, RESERVATION + SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.listing_change_id + ) + lc.taxon_party_timeline_id, + lc.listing_change_id, + min(lcg.effective_at) OVER( + PARTITION BY + lc.taxon_party_timeline_id, + lc.listing_change_id + ) AS effective_at, + lc.change_type_name + FROM applicable_inherited_taxon_listing_changes_mv lc + JOIN stateful_listing_change_groups_mv lcg + ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lcg.effective_at = lc.effective_at + AND lc.change_type_name IN ('ADDITION', 'RESERVATION') + UNION ALL + -- DELETION + SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.listing_change_id + ) + lc.taxon_party_timeline_id, + lc.listing_change_id, + min(lcg.effective_at) OVER( + PARTITION BY + lc.taxon_party_timeline_id, + lc.listing_change_id + ) AS effective_at, + 'DELETION' AS change_type_name + FROM applicable_inherited_taxon_listing_changes_mv lc + JOIN stateful_listing_change_groups_mv lcg + ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lcg.effective_at > lc.effective_at + AND EXISTS ( + SELECT TRUE + FROM unnest(lcg.listing_state) AS group_listing_state + WHERE group_listing_state->'species_listing_id' = lc.species_listing_id::TEXT + AND group_listing_state->'rank_distance' = lc.rank_distance::TEXT + AND group_listing_state->'change_type_name' = 'DELETION' + ) + UNION ALL + -- SUPPRESSION + SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.listing_change_id + ) + lc.taxon_party_timeline_id, + lc.listing_change_id, + min(lcg.effective_at) OVER( + PARTITION BY + lc.taxon_party_timeline_id, + lc.listing_change_id + ) AS effective_at, + 'SUPPRESSION' AS change_type_name + FROM applicable_inherited_taxon_listing_changes_mv lc + JOIN stateful_listing_change_groups_mv lcg + ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lcg.effective_at >= lc.effective_at + AND EXISTS ( + SELECT TRUE + FROM unnest(lcg.listing_state) AS group_listing_state + WHERE group_listing_state->'listing_change_id' = lc.listing_change_id::TEXT + AND group_listing_state->'change_type_name' = 'SUPPRESSION' + ) + UNION ALL + SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.listing_change_id + ) + lc.taxon_party_timeline_id, + lc.listing_change_id, + min(lcg.effective_at) OVER( + PARTITION BY + lc.taxon_party_timeline_id, + lc.listing_change_id + ) AS effective_at, + 'UNSUPPRESSION' AS change_type_name + FROM applicable_inherited_taxon_listing_changes_mv lc + JOIN stateful_listing_change_groups_mv lcg + ON lcg.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lcg.effective_at > lc.effective_at + AND EXISTS ( + SELECT TRUE + FROM unnest(lcg.listing_state) AS group_listing_state + WHERE group_listing_state->'listing_change_id' = lc.listing_change_id::TEXT + AND group_listing_state->'change_type_name' = 'UNSUPPRESSION' + ) + WHERE lc.change_type_name IN ('ADDITION', 'RESERVATION') +) +SELECT + DISTINCT ON ( + lc.taxon_party_timeline_id, + lc.listing_change_id, + lc.species_listing_id, + lx.effective_at, + lx.change_type_name + ) + lc.taxon_concept_id, + lc.listing_change_id, + lc.designation_id, + lc.interval_events_ids, + lc.species_listing_id, + lc.inclusion_taxon_concept_id, + lc.original_taxon_concept_id, + lc.original_listing_change_id, + lc.rank_distance, + lx.change_type_name, + lc.event_id, + lx.effective_at, + lc.is_current, + lc.excluded_taxon_concept_ids, + lc.party_id, + lc.listed_geo_entities_ids, + lc.excluded_geo_entities_ids, + lc.taxon_party_timeline_id, + lc.timeline_position, + lc.is_geographically_excluded, + lc.is_taxonomically_excluded +FROM + applicable_inherited_taxon_listing_changes_mv lc +JOIN to_list lx + ON lx.taxon_party_timeline_id = lc.taxon_party_timeline_id + AND lx.listing_change_id = lc.listing_change_id +ORDER BY + lc.taxon_party_timeline_id, + lc.listing_change_id, + lx.effective_at +; + +CREATE MATERIALIZED VIEW complete_listing_changes_mv + AS SELECT * FROM complete_listing_changes_dv +; + +CREATE INDEX ON complete_listing_changes_mv ( + taxon_concept_id, designation_id, party_id, effective_at +); + +CREATE INDEX ON complete_listing_changes_mv ( + taxon_party_timeline_id, effective_at +); + + +SELECT + change_type_name, min(taxon_concept_id), count(*) +FROM complete_listing_changes_mv +WHERE designation_id = 1 +group by 1; + +-- change_type_name | min | count +-- ------------------+-----+-------- +-- RESERVATION | 136 | 64610 +-- ADDITION | 50 | 590385 +-- SUPPRESSION | 186 | 14598 +-- UNSUPPRESSION | 788 | 305 +-- DELETION | 186 | 26388 + +-- SELECT +-- change_type_name, min(taxon_concept_id), count(*) +-- FROM cites_listing_changes_mview +-- group by 1; +-- +-- change_type_name | min | count +-- ------------------------+-----+-------- +-- ADDITION | 50 | 395506 +-- DELETION | 186 | 3645 +-- EXCEPTION | 395 | 2364 +-- RESERVATION | 136 | 42609 +-- RESERVATION_WITHDRAWAL | 136 | 40981 +-- (5 rows) + +-- SELECT * FROM complete_listing_changes_mv WHERE designation_id = 1 AND taxon_concept_id = ( +-- SELECT taxon_concept_id FROM complete_listing_changes_mv WHERE designation_id = 1 AND change_type_name = 'UNSUPPRESSION' +-- ); + + +-- select * from applicable_inherited_taxon_listing_changes_mv lc where taxon_concept_id = 6353 and designation_id = 1; diff --git a/spec/services/species/trade_name_prefix_matcher_spec.rb b/spec/services/species/trade_name_prefix_matcher_spec.rb index e41ccd1c2..ccbdefe2c 100644 --- a/spec/services/species/trade_name_prefix_matcher_spec.rb +++ b/spec/services/species/trade_name_prefix_matcher_spec.rb @@ -4,10 +4,12 @@ @accepted_name = create_cites_eu_genus( taxon_name: create(:taxon_name, scientific_name: 'Pavona') ) + @trade_name = create_cites_eu_species( taxon_name: create(:taxon_name, scientific_name: 'Pavona minor'), name_status: 'T' ) + @status_N_species = create_cites_eu_species( taxon_name: create(:taxon_name, scientific_name: 'Paradisaea'), parent: create_cites_eu_genus( @@ -15,14 +17,18 @@ ), name_status: 'N' ) + create( :taxon_relationship, taxon_concept: @accepted_name, other_taxon_concept: @trade_name, taxon_relationship_type: trade_name_relationship_type ) + create_cites_I_addition(taxon_concept: @accepted_name) + SapiModule::StoredProcedures.rebuild_cites_taxonomy_and_listings + @accepted_name_ac = MAutoCompleteTaxonConcept.find(@accepted_name.id) @trade_name_ac = MAutoCompleteTaxonConcept.find(@trade_name.id) @status_N_species_ac = MAutoCompleteTaxonConcept.find(@status_N_species.id) @@ -42,9 +48,9 @@ end specify { expect(subject.results).to include(@status_N_species_ac) } - end + end - context 'when trade internal visibility' do + context 'when trade internal visibility' do subject do Species::TaxonConceptPrefixMatcher.new( { @@ -56,9 +62,9 @@ end specify { expect(subject.results).to include(@status_N_species_ac) } - end + end - context 'when speciesplus visibility' do + context 'when speciesplus visibility' do subject do Species::TaxonConceptPrefixMatcher.new( { diff --git a/spec/services/taxon_concept_prefix_matcher_spec.rb b/spec/services/taxon_concept_prefix_matcher_spec.rb index 0d0762b45..2ad678631 100644 --- a/spec/services/taxon_concept_prefix_matcher_spec.rb +++ b/spec/services/taxon_concept_prefix_matcher_spec.rb @@ -8,12 +8,14 @@ taxon_name: create(:taxon_name, scientific_name: 'Aaa') ) end + let!(:taxon_concept2) do create_cites_eu_family( taxon_name: create(:taxon_name, scientific_name: 'Aac'), parent: taxon_concept1 ) end + let!(:taxon_concept3) do create_cites_eu_subfamily( taxon_name: create(:taxon_name, scientific_name: 'Aab'), @@ -32,23 +34,28 @@ parent: taxon_concept3 ) end + let!(:hybrid) do - tmp = create_cites_eu_genus( + hybrid_genus = create_cites_eu_genus( taxon_name: create(:taxon_name, scientific_name: 'Abc'), name_status: 'H' ) + create( :taxon_relationship, taxon_concept: taxon_concept4, - other_taxon_concept: tmp, + other_taxon_concept: hybrid_genus, taxon_relationship_type: hybrid_relationship_type ) - tmp + + hybrid_genus end + context 'when name status not specified' do let(:matcher_params) do SearchParams.new(taxonomy: { id: taxonomy.id }, scientific_name: 'Ab') end + let(:matcher) { TaxonConceptPrefixMatcher.new matcher_params } specify { expect(matcher.taxon_concepts).to include(taxon_concept4) } specify { expect(matcher.taxon_concepts).not_to include(hybrid) } @@ -58,7 +65,9 @@ let(:matcher_params) do SearchParams.new(taxonomy: { id: taxonomy.id }, scientific_name: 'Ab', name_status: 'H') end + let(:matcher) { TaxonConceptPrefixMatcher.new matcher_params } + specify { expect(matcher.taxon_concepts).not_to include(taxon_concept4) } specify { expect(matcher.taxon_concepts).to include(hybrid) } end @@ -71,6 +80,7 @@ scientific_name: 'A' ) end + let(:parent_matcher) do TaxonConceptPrefixMatcher.new parent_matcher_params end @@ -88,6 +98,7 @@ scientific_name: 'AAA' ) end + let(:ancestor_matcher) do TaxonConceptPrefixMatcher.new ancestor_matcher_params end @@ -105,6 +116,7 @@ scientific_name: 'AAA' ) end + let(:self_and_ancestor_matcher) do TaxonConceptPrefixMatcher.new self_and_ancestor_matcher_params end @@ -140,6 +152,7 @@ scientific_name: 'A' ) end + let(:descendant_matcher) do TaxonConceptPrefixMatcher.new descendant_matcher_params end