| Class | TaliaUtil::ImportJobHelper |
| In: |
lib/talia_util/import_job_helper.rb
|
| Parent: | Object |
Helper methods that will be used during import job runs. The import jobs may use the following environment parameters:
The import itself consists in calling initialize and the do_import
| base_url | [R] | |
| callback | [R] | |
| credentials | [R] | |
| duplicates | [R] | |
| importer | [R] | |
| index_data | [R] | |
| message_stream | [R] | |
| progressor | [R] | |
| reset | [R] | |
| trace | [R] | |
| xml_data | [R] |
The message_stream will be used for printing progress messages.
The procedure of the import is the following:
# File lib/talia_util/import_job_helper.rb, line 65
65: def initialize(message_stream = STDOUT, progressor = TaliaUtil::BarProgressor)
66: @trace = (defined?(Rake) ? Rake.application.options.trace : false) || ENV['trace']
67: @progressor = progressor
68: @message_stream = message_stream
69: @duplicates = ENV['duplicates'].to_sym if(ENV['duplicates'])
70: @importer = ENV['importer'] || 'TaliaCore::ActiveSourceParts::Xml::SourceReader'
71: @credentials = { :http_basic_authentication => [ENV['user'], ENV['pass']] } unless(ENV['user'].blank?)
72: assit(!(ENV['xml'] && ENV['index']), 'Not both xml and index parameters allowed')
73: @reset = ENV['reset_store'].yes?
74:
75: @base_url = ENV['base_url'].blank? ? '' : ENV['base_url']
76: if(base_url && File.directory?(base_url))
77: message_stream.puts "Setting directory to #{base_url}"
78: FileUtils.cd(base_url)
79: end
80:
81: init_data
82:
83: @callback = ENV['callback'].classify.constantize.new unless(ENV['callback'].blank?)
84:
85: message_stream.puts "Registered callback (#{callback.class.name}) - (#{callback.respond_to?(:before_import)}|#{callback.respond_to?(:after_import)})" if(callback)
86:
87: callback.progressor = progressor if(callback && callback.respond_to?('progressor=''progressor='))
88: end
Does the actual importing:
# File lib/talia_util/import_job_helper.rb, line 118
118: def do_import
119: if(reset)
120: TaliaUtil::Util.full_reset
121: puts "Data Store has been completely reset"
122: end
123: errors = []
124: run_callback(:before_import)
125: if(index_data)
126: import_from_index(errors)
127: else
128: puts "Importing from single data file."
129: TaliaCore::ActiveSource.create_from_xml(xml_data, :progressor => progressor, :reader => importer, :base_file_uri => @true_root, :errors => errors, :duplicates => duplicates)
130: end
131: if(errors.size > 0)
132: puts "WARNING: #{errors.size} errors during import:"
133: errors.each { |e| print_error e }
134: end
135: run_callback(:after_import)
136: end
This is only used if an index file is given for the import. All "plain" imports go directly to create_from_xml in the ActiveSource class
# File lib/talia_util/import_job_helper.rb, line 157
157: def import_from_index(errors)
158: doc = Hpricot.XML(index_data)
159: hyper_format = (doc.root.name == 'sigla')
160: elements = hyper_format ? (doc/:siglum) : (doc/:url)
161: puts "Import from Index file, #{elements.size} elements"
162: # Read the Attributes from the urls
163: source_attributes = []
164: my_importer = importer.classify.constantize
165: progressor.run_with_progress('Reading w/ index', elements.size) do |prog|
166: elements.each do |element|
167: url = make_url_from("#{element.inner_text}#{ENV['extension']}")
168: begin
169: this_attribs = my_importer.sources_from_url(url, credentials)
170: source_attributes = source_attributes + this_attribs
171: rescue Exception => e
172: message_stream.puts "Problem importing #{url} (#{e.message})"
173: message_stream.puts e.backtrace
174: end
175: prog.inc
176: end
177: end
178: # Write the data
179: TaliaCore::ActiveSource.progressor = progressor
180: TaliaCore::ActiveSource.create_multi_from(source_attributes, :errors => errors, :duplicates => duplicates)
181: end
Reads the data for the coming import. If the ‘index’ parameter is found in the environment, this will be used as the file name for the index file, which will be read into the object. Otherwise, if the ‘xml’ environment variable is set, this will will be read and used as the XML data for the import
# File lib/talia_util/import_job_helper.rb, line 94
94: def init_data
95: if(ENV['index'].blank?)
96: @xml_data = if(ENV['xml'].blank?)
97: STDIN.read
98: else
99: xml_url = ENV['xml']
100: xml_url = base_url + xml_url unless(File.exists?(xml_url))
101: @true_root = base_for(xml_url)
102: open_generic(xml_url, credentials) { |io| io.read }
103: end
104: else
105: index = make_url_from(ENV['index'])
106: @index_data = open_generic(index, credentials) { |io| io.read }
107: end
108: end
# File lib/talia_util/import_job_helper.rb, line 183
183: def make_url_from(url)
184: return url if(File.exist?(url))
185: "#{base_url}#{url}"
186: end
Prints the message and, if the "trace" option is set, also the stack trace of the Exception e
# File lib/talia_util/import_job_helper.rb, line 140
140: def print_error(e)
141: puts e.message
142: puts e.backtrace if(trace)
143: end