diff --git a/lib/bolognese.rb b/lib/bolognese.rb index 0a697b63..32dee151 100644 --- a/lib/bolognese.rb +++ b/lib/bolognese.rb @@ -16,6 +16,7 @@ require 'csl/styles' require 'edtf' +require "bolognese/citeproc_extensions" require "bolognese/version" require "bolognese/metadata" require "bolognese/cli" diff --git a/lib/bolognese/citeproc_extensions.rb b/lib/bolognese/citeproc_extensions.rb new file mode 100644 index 00000000..f75d49cc --- /dev/null +++ b/lib/bolognese/citeproc_extensions.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +# Minimal patch for csl-ruby and citeproc-ruby compatibility +# Root cause: 'contributor' is not recognized as a names variable in citeproc gem +# https://github.com/inukshuk/citeproc/blob/121fa4a950b9bd71960e42d20db96bcea1165201/lib/citeproc/variable.rb#L20-L24 + +module CiteProc + class Variable + # Unfreeze, modify, and refreeze the fields to add 'contributor' and 'accepted-date' + if @fields + # Unfreeze the fields hash temporarily + fields_dup = @fields.dup + + # Add contributor to names (make a new unfrozen array) + fields_dup[:names] = (@fields[:names] + [:contributor]).uniq + + # Add accepted-date to dates (make a new unfrozen array) + fields_dup[:date] = (@fields[:date] + [:'accepted-date']).uniq + + # Rebuild the types mapping - only use actual type keys, not aliases like :all, :any, etc. + types_hash = Hash[*[:date, :names, :number, :text].map { |k| fields_dup[k].map { |n| [n, k] } }.flatten] + + # Update the class instance variables + @fields = fields_dup + @types = Hash.new { |h,k| h.fetch(k.to_sym, nil) }.merge(types_hash).freeze + + # Rebuild @factories from the new @types + # This maps each field name to its Variable subclass (Names, Date, Text, Number) + @factories = Hash.new { |h,k| h.fetch(k.to_s.intern, CiteProc::Variable) }.merge( + Hash[*@types.map { |field_name, type| + [field_name, CiteProc.const_get(type.to_s.capitalize)] + }.flatten] + ).freeze + + # Recreate the aliases + @fields[:name] = @fields[:names] + @fields[:dates] = @fields[:date] + @fields[:numbers] = @fields[:number] + + # Recreate :all and :any + @fields[:all] = @fields[:any] = + [:date, :names, :text, :number].reduce([]) { |s,a| s.concat(@fields[a]) }.sort + + # Refreeze fields + @fields.freeze + end + end +end diff --git a/lib/bolognese/metadata_utils.rb b/lib/bolognese/metadata_utils.rb index 718aa20d..7b2a2add 100644 --- a/lib/bolognese/metadata_utils.rb +++ b/lib/bolognese/metadata_utils.rb @@ -139,19 +139,28 @@ def citeproc_hsh author = to_citeproc(creators) end - if types["resourceTypeGeneral"] == "Software" && version_info.present? - type = "book" + if types["resourceTypeGeneral"] == "Software" + type = "software" else type = types["citeproc"] end + # Filter out contributors who are already creators, editors, or translators to avoid duplication + creator_names = Array.wrap(creators).map { |c| c["name"] || [c["givenName"], c["familyName"]].compact.join(" ") }.compact + unique_contributors = Array.wrap(contributors).reject do |c| + contributor_name = c["name"] || [c["givenName"], c["familyName"]].compact.join(" ") + creator_names.include?(contributor_name) || + c["contributorType"] == "Editor" || + c["contributorType"] == "Translator" + end + { "type" => type, "id" => normalize_doi(doi), "categories" => Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.presence, "language" => language, "author" => author, - "contributor" => to_citeproc(contributors), + "contributor" => unique_contributors.presence ? to_citeproc(unique_contributors) : nil, "editor" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Editor" }) : nil, "translator" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Translator" }) : nil, "issued" => get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year.to_s), diff --git a/spec/fixtures/datacite_xml_csl_with_contributors_and_available.xml b/spec/fixtures/datacite_xml_csl_with_contributors_and_available.xml new file mode 100644 index 00000000..02a862fb --- /dev/null +++ b/spec/fixtures/datacite_xml_csl_with_contributors_and_available.xml @@ -0,0 +1,58 @@ + + + 10.81360/BIFURCATED + + + Toon, Geoffrey C. + Jet Propulsion Laboratory, California Institute of Technology, Pasadena, CA, USA + + + Wunch, Debra + 0000-0002-4924-0377 + California Institute of Technology, Pasadena, CA, U.S.A. + + + + A stand-alone a priori profile generation tool for GGG2014 release + + CaltechDATA + 2015 + + TCCON + + + + Toon, Geoffrey C. + Jet Propulsion Laboratory, California Institute of Technology, Pasadena, CA, USA + + + Wunch, Debra + California Institute of Technology, Pasadena, CA, USA + + + TCCON + + + + 2017-07-24 + 2015-10-14 + + eng + + + 250 + 250 + + + 10.14291/tccon.ggg2014.documentation.R0/1221662 + https://tccon-wiki.caltech.edu/ + + GGG2014.R0 + + TCCON Data Use Policy + + + The Total Carbon Column Observing Network (TCCON) is a network of ground-based Fourier Transform Spectrometers that record direct solar absorption spectra of the atmosphere in the near-infrared. From these spectra, accurate and precise column-averaged abundances of atmospheric constituents including CO2, CH4, N2O, HF, CO, H2O, and HDO, are retrieved. This is a stand-alone a priori profile generation tool for the GGG2014 data release. + <br>Cite this record as:<br>Toon, G. C., &amp; Wunch, D. (2017). A stand-alone a priori profile generation tool for GGG2014 release. CaltechDATA. <a href="https://doi.org/10.14291/tccon.ggg2014.priors.r0/1221661">https://doi.org/10.14291/tccon.ggg2014.priors.r0/1221661</a><br> or choose a <a href="https://crosscite.org/?doi=10.14291/TCCON.GGG2014.PRIORS.R0/1221661"> different citation style.</a><br><a href="https://data.datacite.org/application/x-bibtex/10.14291/TCCON.GGG2014.PRIORS.R0/1221661">Download Citation</a><br> + + \ No newline at end of file diff --git a/spec/writers/citation_writer_spec.rb b/spec/writers/citation_writer_spec.rb index 4195d0d5..9206cd12 100644 --- a/spec/writers/citation_writer_spec.rb +++ b/spec/writers/citation_writer_spec.rb @@ -38,7 +38,7 @@ subject = Bolognese::Metadata.new(input: input, from: "datacite") expect(subject.style).to eq("apa") expect(subject.locale).to eq("en-US") - expect(subject.citation).to eq("Lab for Exosphere and Near Space Environment Studies. (2019). lenses-lab/LYAO_RT-2018JA026426: Original Release (Version 1.0.0). Zenodo. https://doi.org/10.5281/zenodo.2598836") + expect(subject.citation).to eq("Lab for Exosphere and Near Space Environment Studies. (2019). lenses-lab/LYAO_RT-2018JA026426: Original Release (Version 1.0.0) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.2598836") end it "interactive resource without dates" do @@ -112,5 +112,11 @@ expect(subject.locale).to eq("en-US") expect(subject.citation).to eq("M. Fenner, “Eating your own Dog Food,” Understanding the fictional John Smith, vol. 776, no. 1. DataCite, pp. 50–60, Dec. 20, 2016. doi: 10.5438/4k3m-nyvg.") end + + it "with contributors and available date" do + input = fixture_path + "datacite_xml_csl_with_contributors_and_available.xml" + subject = Bolognese::Metadata.new(input: input, from: "datacite") + expect(subject.citation).to eq("Toon, G. C., & Wunch, D. (2015). A stand-alone a priori profile generation tool for GGG2014 release (Version GGG2014.R0) [Computer software]. CaltechDATA. https://doi.org/10.81360/bifurcated") + end end end diff --git a/spec/writers/citeproc_writer_spec.rb b/spec/writers/citeproc_writer_spec.rb index 81ef13bf..562bec6f 100644 --- a/spec/writers/citeproc_writer_spec.rb +++ b/spec/writers/citeproc_writer_spec.rb @@ -105,7 +105,7 @@ input = "https://doi.org/10.6084/m9.figshare.4906367.v1" subject = Bolognese::Metadata.new(input: input, from: "datacite") json = JSON.parse(subject.citeproc) - expect(json["type"]).to eq("article") + expect(json["type"]).to eq("software") expect(json["DOI"]).to eq("10.6084/m9.figshare.4906367.v1") expect(json["title"]).to eq("Scimag catalogue of LibGen as of January 1st, 2014") expect(json["copyright"]).to eq("Creative Commons Zero v1.0 Universal") @@ -115,7 +115,7 @@ input = "https://doi.org/10.5281/zenodo.2598836" subject = Bolognese::Metadata.new(input: input, from: "datacite") json = JSON.parse(subject.citeproc) - expect(json["type"]).to eq("book") + expect(json["type"]).to eq("software") expect(json["DOI"]).to eq("10.5281/zenodo.2598836") expect(json["version"]).to eq("1.0.0") expect(json["copyright"]).to eq("Open Access") @@ -125,7 +125,7 @@ input = fixture_path + "datacite_software_version.json" subject = Bolognese::Metadata.new(input: input, from: "datacite_json") json = JSON.parse(subject.citeproc) - expect(json["type"]).to eq("book") + expect(json["type"]).to eq("software") expect(json["DOI"]).to eq("10.5281/ZENODO.2598836") expect(json["version"]).to eq("1.0.0") expect(json["copyright"]).to eq("Open Access") @@ -241,7 +241,7 @@ input = "https://github.com/datacite/maremma" subject = Bolognese::Metadata.new(input: input, from: "codemeta") json = JSON.parse(subject.citeproc) - expect(json["type"]).to eq("article-journal") + expect(json["type"]).to eq("software") expect(json["id"]).to eq("https://doi.org/10.5438/qeg0-3gm3") expect(json["DOI"]).to eq("10.5438/qeg0-3gm3") expect(json["title"]).to eq("Maremma: a Ruby library for simplified network calls")