class Taxonifi::Export::SpeciesFile

Dumps tables identical to the existing structure in SpeciesFile. Will only work in the pre Identity world. Will reconfigure as templates for Jim’s work after the fact.

Constants

MANIFEST: MANIFEST order is important
SPECIES_FILE_RANKS: tblRanks 5/17/2012

Attributes

author_index[RW]

authorized_user_id[RW]

genus_names[RW]

name_collection[RW]

nomenclator[RW]

ref_collection[RW]

species_names[RW]

time[RW]

Public Class Methods

new(options = {}) click to toggle source

# File lib/export/format/species_file.rb, line 64
def initialize(options = {})
  opts = {
    :nc => Taxonifi::Model::NameCollection.new,
    :export_folder => 'species_file',
    :authorized_user_id => nil
  }.merge!(options)

  super(opts)
  raise Taxonifi::Export::ExportError, 'NameCollection not passed to SpeciesFile export.' if ! opts[:nc].class == Taxonifi::Model::NameCollection
  raise Taxonifi::Export::ExportError, 'You must provide authorized_user_id for species_file export initialization.' if opts[:authorized_user_id].nil?
  @name_collection = opts[:nc]
  @authorized_user_id = opts[:authorized_user_id]
  @author_index = {}
 
  # 
  # Careful here, at present we are just generating Reference micro-citations from our names, so the indexing "just works"
  # because it's all internal.  There will is a strong potential for key collisions if this pipeline is modified to 
  # include references external to the initialized name_collection.  See also export_references.
  #
  @by_author_reference_index = {}
  @genus_names = {}
  @species_names = {}
  @nomenclator = {}
  @time = Time.now.strftime("%F %T") 
end

Public Instance Methods

build_author_index() click to toggle source

Assumes names that are the same are the same person.

# File lib/export/format/species_file.rb, line 112
def build_author_index
  @author_index = @name_collection.ref_collection.unique_authors.inject({}){|hsh, a| hsh.merge!(a.compact_string => a)}
end

csv_for_genus_and_species_names_tables(type) click to toggle source

# File lib/export/format/species_file.rb, line 290
def csv_for_genus_and_species_names_tables(type)
  col = "#{type}NameID"
  @headers = [col, "Name", "LastUpdate", "ModifiedBy", "Italicize"]
  @csv_string = CSV.generate() do |csv|
    csv << @headers 
    var = self.send("#{type.downcase}_names")
    var.keys.each_with_index do |n,i|
      var[n] = i + 1
      cols = {
        col.to_sym => i + 1,
        Name: n,
        LastUpdate: @time, 
        ModifiedBy: @authorized_user_id,
        Italicize: 1                              # always true for these data
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string 
end

export() click to toggle source

# File lib/export/format/species_file.rb, line 116
def export()
  super
  @name_collection.generate_ref_collection(1)

  # Give authors unique ids
  @name_collection.ref_collection.uniquify_authors(1)
  build_author_index 

  # See notes in #initalize re potential key collisions!
  @by_author_reference_index =  @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)}
  
  @name_collection.names_at_rank('genus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
  @name_collection.names_at_rank('subgenus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
  @name_collection.names_at_rank('species').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
  @name_collection.names_at_rank('subspecies').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}

  MANIFEST.each do |f|
    write_file(f, send(f))
  end
end

export_references(options = {}) click to toggle source

Export only the ref_collection. Sidesteps the main name-centric exports Note that this still uses the base @#name_collection object as a starting reference, it just references @name_collection.ref_collection. So you can do:

nc = Taxonifi::Model::NameCollection.new
nc.ref_collection = Taxonifi::Model::RefCollection.new
etc.

# File lib/export/format/species_file.rb, line 96
def export_references(options = {})
  opts = {
    :starting_ref_id => 0,
    :starting_author_id => 0
  }

  configure_folders
  build_author_index 

  # order matters
  ['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
    write_file(t, send(t))
  end
end

sqlRefs() click to toggle source

TODO make a standard transaction wrapper

# File lib/export/format/species_file.rb, line 185
def sqlRefs
  sql = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
  @headers = %w{RefID ActualYear Title PubID  Verbatim}
  @name_collection.ref_collection.collection.each_with_index do |r,i|
    cols = {
      RefID: r.id, #  i + 1,
      Title: (r.title.nil? ? """""" : r.title),
      PubID: 0,                                   # Careful - assumes you have a pre-generated PubID of Zero in there, PubID table is not included in CSV imports
      ActualYear: r.year,
      Verbatim: r.full_citation
    }
    sql <<  "INSERT INTO tblRefs (#{@headers.sort.join(",")}) VALUES (#{@headers.sort.collect{|h| "'#{cols[h.to_sym].to_s.gsub(/'/,"''")}'"}.join(",")});"
  end
  sql << ['COMMIT', 'END TRY', 'BEGIN CATCH', 'ROLLBACK', 'END CATCH']
  sql.join("\n") 
end

tblCites() click to toggle source

Generate tblCites string.

# File lib/export/format/species_file.rb, line 250
def tblCites
  @headers = %w{TaxonNameID SeqNum RefID NomenclatorID LastUpdate ModifiedBy NewNameStatus CitePages Note TypeClarification CurrentConcept ConceptChange InfoFlags InfoFlagStatus PolynomialStatus}
  @csv_string = CSV.generate() do |csv|
    csv << @headers  
    @name_collection.collection.each do |n|
      ref = @by_author_reference_index[n.author_year_index]
      next if ref.nil?
      cols = {
        TaxonNameID: n.id,
        SeqNum: 1,
        RefID: ref.id,
        NomenclatorID: @nomenclator[n.nomenclator_name], 
        LastUpdate: @time, 
        ModifiedBy: @authorized_user_id,
        CitePages: """""",        # equates to "" in CSV speak
        NewNameStatus: 0,
        Note: """""",
        TypeClarification: 0,     # We might derive more data from this
        CurrentConcept: 1,        # Boolean, right?
        ConceptChange: 0,         # Unspecified
        InfoFlags: 0,             # 
        InfoFlagStatus: 1,        # 1 => needs review
        PolynomialStatus: 0
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string
end

tblGenusNames() click to toggle source

# File lib/export/format/species_file.rb, line 280
def tblGenusNames
  @csv_string = csv_for_genus_and_species_names_tables('Genus')
  @csv_string
end

tblNomenclator() click to toggle source

must be called post tblGenusNames and tblSpeciesNames

# File lib/export/format/species_file.rb, line 312
def tblNomenclator
  @headers = %w{NomenclatorID GenusNameID SubgenusNameID SpeciesNameID SubspeciesNameID LastUpdate ModifiedBy SuitableForGenus SuitableForSpecies InfrasubspeciesNameID InfrasubKind}
  @csv_string = CSV.generate() do |csv|
    csv << @headers
    i = 1
    @name_collection.collection.each do |n|
      next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('genus')
      cols = {
        NomenclatorID: i,
        GenusNameID: @genus_names[n.parent_name_at_rank('genus')] || 0,
        SubgenusNameID: @genus_names[n.parent_name_at_rank('subgenus')] || 0,
        SpeciesNameID: @species_names[n.parent_name_at_rank('species')] || 0,
        SubspeciesNameID: @species_names[n.parent_name_at_rank('subspecies')] || 0,
        InfrasubspeciesNameID: 0,
        InfrasubKind: 0,                          # this might be wrong
        LastUpdate: @time,  
        ModifiedBy: @authorized_user_id, 
        SuitableForGenus: 0,                      # Set in SF 
        SuitableForSpecies: 0                     # Set in SF
      }
      @nomenclator.merge!(n.nomenclator_name => i)
      i += 1
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string
end

tblPeople() click to toggle source

Generate tblPeople string.

# File lib/export/format/species_file.rb, line 203
def tblPeople
  @headers = %w{PersonID FamilyName GivenNames GivenInitials Suffix Role LastUpdate ModifiedBy}
  @csv_string = CSV.generate() do |csv|
    csv << @headers  
    @author_index.keys.each_with_index do |k,i|
      a = @author_index[k] 
      # a.id = i + 1
      cols = {
        PersonID: a.id,
        FamilyName: a.last_name,
        GivenName: a.first_name,
        GivenInitials: a.initials_string,
        Suffix: a.suffix,
        Role: 1,                          # authors 
        LastUpdate: @time,
        ModifiedBy: @authorized_user_id
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string
end

tblRefAuthors() click to toggle source

Generate tblRefAuthors string.

# File lib/export/format/species_file.rb, line 227
def tblRefAuthors 
  @headers = %w{RefID PersonID SeqNum AuthorCount LastUpdate ModifiedBy}
  @csv_string = CSV.generate() do |csv|
    csv << @headers  
    @name_collection.ref_collection.collection.each do |r| 
      r.authors.each_with_index do |x, i|
        a = @author_index[x.compact_string] 
        cols = {
          RefID: r.id,
          PersonID: a.id,
          SeqNum: i + 1,
          AuthorCount: r.authors.size,
          LastUpdate: @time,
          ModifiedBy: @authorized_user_id
        }
        csv <<  @headers.collect{|h| cols[h.to_sym]} 
      end
    end
  end
  @csv_string
end

tblRefs() click to toggle source

Generate a tblRefs string.

# File lib/export/format/species_file.rb, line 166
def tblRefs
  @headers = %w{RefID ActualYear Title PubID  Verbatim}
  @csv_string = CSV.generate(:col_sep => "\t") do |csv|
    csv << @headers  
    @name_collection.ref_collection.collection.each_with_index do |r,i|
      cols = {
        RefID: r.id, #  i + 1,
        Title: (r.title.nil? ? """""" : r.title),
        PubID: 0,                                   # Careful - assumes you have a pre-generated PubID of Zero in there, PubID table is not included in CSV imports
        ActualYear: r.year,
        Verbatim: r.full_citation
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string
end

tblSpeciesNames() click to toggle source

# File lib/export/format/species_file.rb, line 285
def tblSpeciesNames
  @csv_string = csv_for_genus_and_species_names_tables('Species')
  @csv_string
end

tblTaxa() click to toggle source

# File lib/export/format/species_file.rb, line 137
def tblTaxa
  @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
  @csv_string = CSV.generate() do |csv|
    csv << @headers  
    @name_collection.collection.each do |n|
      ref = @by_author_reference_index[n.author_year_index]
      cols = {
        TaxonNameID: n.id,
        TaxonNameStr: n.parent_ids_sf_style,        # closure -> ends with 1 
        RankID: SPECIES_FILE_RANKS[n.rank], 
        Name: n.name,
        Parens: (n.parens ? 1 : 0),
        AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id),   # !! SF folks like to pre-populate with zeros
        RefID: (ref ? ref.id : 0),
        DataFlags: 0,                                # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
        AccessCode: 0,             
        NameStatus: (n.related_name.nil? ? 0 : 7),                            # 0 :valid, 7: synonym)
        StatusFlags: (n.related_name.nil? ? 0 : 262144),                      # 0 :valid, 262144: jr. synonym
        OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0),      # SF must be pre-configured with 0 filler (this restriction needs to go)                
        LastUpdate: @time, 
        ModifiedBy: @authorized_user_id,
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
 @csv_string
end