class Taxonifi::Export::SpeciesFile

Dumps tables identical to the existing structure in SpeciesFile. Will only work in the pre Identity world. Will reconfigure as templates for Jim’s work after the fact.

Constants

MANIFEST

MANIFEST order is important

SPECIES_FILE_RANKS

tblRanks 5/17/2012

Attributes

author_index[RW]
authorized_user_id[RW]
genus_names[RW]
name_collection[RW]
nomenclator[RW]
ref_collection[RW]
species_names[RW]
time[RW]

Public Class Methods

new(options = {}) click to toggle source
# File lib/export/format/species_file.rb, line 64
def initialize(options = {})
  opts = {
    :nc => Taxonifi::Model::NameCollection.new,
    :export_folder => 'species_file',
    :authorized_user_id => nil
  }.merge!(options)

  super(opts)
  raise Taxonifi::Export::ExportError, 'NameCollection not passed to SpeciesFile export.' if ! opts[:nc].class == Taxonifi::Model::NameCollection
  raise Taxonifi::Export::ExportError, 'You must provide authorized_user_id for species_file export initialization.' if opts[:authorized_user_id].nil?
  @name_collection = opts[:nc]
  @authorized_user_id = opts[:authorized_user_id]
  @author_index = {}
 
  # 
  # Careful here, at present we are just generating Reference micro-citations from our names, so the indexing "just works"
  # because it's all internal.  There will is a strong potential for key collisions if this pipeline is modified to 
  # include references external to the initialized name_collection.  See also export_references.
  #
  @by_author_reference_index = {}
  @genus_names = {}
  @species_names = {}
  @nomenclator = {}
  @time = Time.now.strftime("%F %T") 
end

Public Instance Methods

build_author_index() click to toggle source

Assumes names that are the same are the same person.

# File lib/export/format/species_file.rb, line 112
def build_author_index
  @author_index = @name_collection.ref_collection.unique_authors.inject({}){|hsh, a| hsh.merge!(a.compact_string => a)}
end
csv_for_genus_and_species_names_tables(type) click to toggle source
# File lib/export/format/species_file.rb, line 290
def csv_for_genus_and_species_names_tables(type)
  col = "#{type}NameID"
  @headers = [col, "Name", "LastUpdate", "ModifiedBy", "Italicize"]
  @csv_string = CSV.generate() do |csv|
    csv << @headers 
    var = self.send("#{type.downcase}_names")
    var.keys.each_with_index do |n,i|
      var[n] = i + 1
      cols = {
        col.to_sym => i + 1,
        Name: n,
        LastUpdate: @time, 
        ModifiedBy: @authorized_user_id,
        Italicize: 1                              # always true for these data
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string 
end
export() click to toggle source
# File lib/export/format/species_file.rb, line 116
def export()
  super
  @name_collection.generate_ref_collection(1)

  # Give authors unique ids
  @name_collection.ref_collection.uniquify_authors(1)
  build_author_index 

  # See notes in #initalize re potential key collisions!
  @by_author_reference_index =  @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)}
  
  @name_collection.names_at_rank('genus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
  @name_collection.names_at_rank('subgenus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
  @name_collection.names_at_rank('species').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
  @name_collection.names_at_rank('subspecies').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}

  MANIFEST.each do |f|
    write_file(f, send(f))
  end
end
export_references(options = {}) click to toggle source

Export only the ref_collection. Sidesteps the main name-centric exports Note that this still uses the base @#name_collection object as a starting reference, it just references @name_collection.ref_collection. So you can do:

nc = Taxonifi::Model::NameCollection.new
nc.ref_collection = Taxonifi::Model::RefCollection.new
etc.
# File lib/export/format/species_file.rb, line 96
def export_references(options = {})
  opts = {
    :starting_ref_id => 0,
    :starting_author_id => 0
  }

  configure_folders
  build_author_index 

  # order matters
  ['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
    write_file(t, send(t))
  end
end
sqlRefs() click to toggle source

TODO make a standard transaction wrapper

# File lib/export/format/species_file.rb, line 185
def sqlRefs
  sql = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
  @headers = %w{RefID ActualYear Title PubID  Verbatim}
  @name_collection.ref_collection.collection.each_with_index do |r,i|
    cols = {
      RefID: r.id, #  i + 1,
      Title: (r.title.nil? ? """""" : r.title),
      PubID: 0,                                   # Careful - assumes you have a pre-generated PubID of Zero in there, PubID table is not included in CSV imports
      ActualYear: r.year,
      Verbatim: r.full_citation
    }
    sql <<  "INSERT INTO tblRefs (#{@headers.sort.join(",")}) VALUES (#{@headers.sort.collect{|h| "'#{cols[h.to_sym].to_s.gsub(/'/,"''")}'"}.join(",")});"
  end
  sql << ['COMMIT', 'END TRY', 'BEGIN CATCH', 'ROLLBACK', 'END CATCH']
  sql.join("\n") 
end
tblCites() click to toggle source

Generate tblCites string.

# File lib/export/format/species_file.rb, line 250
def tblCites
  @headers = %w{TaxonNameID SeqNum RefID NomenclatorID LastUpdate ModifiedBy NewNameStatus CitePages Note TypeClarification CurrentConcept ConceptChange InfoFlags InfoFlagStatus PolynomialStatus}
  @csv_string = CSV.generate() do |csv|
    csv << @headers  
    @name_collection.collection.each do |n|
      ref = @by_author_reference_index[n.author_year_index]
      next if ref.nil?
      cols = {
        TaxonNameID: n.id,
        SeqNum: 1,
        RefID: ref.id,
        NomenclatorID: @nomenclator[n.nomenclator_name], 
        LastUpdate: @time, 
        ModifiedBy: @authorized_user_id,
        CitePages: """""",        # equates to "" in CSV speak
        NewNameStatus: 0,
        Note: """""",
        TypeClarification: 0,     # We might derive more data from this
        CurrentConcept: 1,        # Boolean, right?
        ConceptChange: 0,         # Unspecified
        InfoFlags: 0,             # 
        InfoFlagStatus: 1,        # 1 => needs review
        PolynomialStatus: 0
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string
end
tblGenusNames() click to toggle source
# File lib/export/format/species_file.rb, line 280
def tblGenusNames
  @csv_string = csv_for_genus_and_species_names_tables('Genus')
  @csv_string
end
tblNomenclator() click to toggle source

must be called post tblGenusNames and tblSpeciesNames

# File lib/export/format/species_file.rb, line 312
def tblNomenclator
  @headers = %w{NomenclatorID GenusNameID SubgenusNameID SpeciesNameID SubspeciesNameID LastUpdate ModifiedBy SuitableForGenus SuitableForSpecies InfrasubspeciesNameID InfrasubKind}
  @csv_string = CSV.generate() do |csv|
    csv << @headers
    i = 1
    @name_collection.collection.each do |n|
      next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('genus')
      cols = {
        NomenclatorID: i,
        GenusNameID: @genus_names[n.parent_name_at_rank('genus')] || 0,
        SubgenusNameID: @genus_names[n.parent_name_at_rank('subgenus')] || 0,
        SpeciesNameID: @species_names[n.parent_name_at_rank('species')] || 0,
        SubspeciesNameID: @species_names[n.parent_name_at_rank('subspecies')] || 0,
        InfrasubspeciesNameID: 0,
        InfrasubKind: 0,                          # this might be wrong
        LastUpdate: @time,  
        ModifiedBy: @authorized_user_id, 
        SuitableForGenus: 0,                      # Set in SF 
        SuitableForSpecies: 0                     # Set in SF
      }
      @nomenclator.merge!(n.nomenclator_name => i)
      i += 1
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string
end
tblPeople() click to toggle source

Generate tblPeople string.

# File lib/export/format/species_file.rb, line 203
def tblPeople
  @headers = %w{PersonID FamilyName GivenNames GivenInitials Suffix Role LastUpdate ModifiedBy}
  @csv_string = CSV.generate() do |csv|
    csv << @headers  
    @author_index.keys.each_with_index do |k,i|
      a = @author_index[k] 
      # a.id = i + 1
      cols = {
        PersonID: a.id,
        FamilyName: a.last_name,
        GivenName: a.first_name,
        GivenInitials: a.initials_string,
        Suffix: a.suffix,
        Role: 1,                          # authors 
        LastUpdate: @time,
        ModifiedBy: @authorized_user_id
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string
end
tblRefAuthors() click to toggle source

Generate tblRefAuthors string.

# File lib/export/format/species_file.rb, line 227
def tblRefAuthors 
  @headers = %w{RefID PersonID SeqNum AuthorCount LastUpdate ModifiedBy}
  @csv_string = CSV.generate() do |csv|
    csv << @headers  
    @name_collection.ref_collection.collection.each do |r| 
      r.authors.each_with_index do |x, i|
        a = @author_index[x.compact_string] 
        cols = {
          RefID: r.id,
          PersonID: a.id,
          SeqNum: i + 1,
          AuthorCount: r.authors.size,
          LastUpdate: @time,
          ModifiedBy: @authorized_user_id
        }
        csv <<  @headers.collect{|h| cols[h.to_sym]} 
      end
    end
  end
  @csv_string
end
tblRefs() click to toggle source

Generate a tblRefs string.

# File lib/export/format/species_file.rb, line 166
def tblRefs
  @headers = %w{RefID ActualYear Title PubID  Verbatim}
  @csv_string = CSV.generate(:col_sep => "\t") do |csv|
    csv << @headers  
    @name_collection.ref_collection.collection.each_with_index do |r,i|
      cols = {
        RefID: r.id, #  i + 1,
        Title: (r.title.nil? ? """""" : r.title),
        PubID: 0,                                   # Careful - assumes you have a pre-generated PubID of Zero in there, PubID table is not included in CSV imports
        ActualYear: r.year,
        Verbatim: r.full_citation
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
  @csv_string
end
tblSpeciesNames() click to toggle source
# File lib/export/format/species_file.rb, line 285
def tblSpeciesNames
  @csv_string = csv_for_genus_and_species_names_tables('Species')
  @csv_string
end
tblTaxa() click to toggle source
# File lib/export/format/species_file.rb, line 137
def tblTaxa
  @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
  @csv_string = CSV.generate() do |csv|
    csv << @headers  
    @name_collection.collection.each do |n|
      ref = @by_author_reference_index[n.author_year_index]
      cols = {
        TaxonNameID: n.id,
        TaxonNameStr: n.parent_ids_sf_style,        # closure -> ends with 1 
        RankID: SPECIES_FILE_RANKS[n.rank], 
        Name: n.name,
        Parens: (n.parens ? 1 : 0),
        AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id),   # !! SF folks like to pre-populate with zeros
        RefID: (ref ? ref.id : 0),
        DataFlags: 0,                                # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
        AccessCode: 0,             
        NameStatus: (n.related_name.nil? ? 0 : 7),                            # 0 :valid, 7: synonym)
        StatusFlags: (n.related_name.nil? ? 0 : 262144),                      # 0 :valid, 262144: jr. synonym
        OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0),      # SF must be pre-configured with 0 filler (this restriction needs to go)                
        LastUpdate: @time, 
        ModifiedBy: @authorized_user_id,
      }
      csv <<  @headers.collect{|h| cols[h.to_sym]} 
    end
  end
 @csv_string
end