Class: MARCMapper

Inherits:
Object
  • Object
show all
Defined in:
lib/marc_mapper.rb

Class Method Summary (collapse)

Class Method Details

+ (Object) from_marc_file(marc_file, &blk)

pass in a path to a marc file a block can be used for logging etc..

mapper.from_marc_file(’/path/to/data.mrc’) do |mapped_doc|

  # do something here... logging etc..

end

this returns an array of documents (hashes)



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/marc_mapper.rb', line 18

def self.from_marc_file(marc_file, &blk)
  
  @mapper = BlockMapper.new
  
  # add custom methods to each marc record
  @mapper.before_each_source_item do |rec,index|
    rec.extend MARCRecordExt
  end
  
  # remove ; / . , : and spaces from the end
  cleanup_regexp = /( |;|\/|\.|,|:)+$/
  # after_each_mapped_value gets executed for each mapped item
  # ... after it has been mapped
  @mapper.after_each_mapped_value do |field,v|
    #puts "cleaning up #{field} value(s) before adding to solr..."
    if v.is_a?(String)
      v.gsub(cleanup_regexp, '') # clean this string and return
    elsif v.is_a?(Array)
      v.map{|vv|vv.gsub(cleanup_regexp, '')} # clean each value and return a new array
    else
      v # just return whatever it is
    end
  end
  
  @mapper.map :id do |rec,index|
    rec['001'].value.gsub(" ","").gsub("/","")
  end
  
  # titles (text)
  @mapper.map :title_t do |rec,index|
    rec.values_for '245', 'a'
  end
  
  @mapper.map :sub_title_t do |rec,index|
    rec.values_for '245', 'b'
  end
  
  @mapper.map :alt_titles_t do |rec,index|
    rec.extract '240:b 700:t 710:t 711:t 440:a 490:a 505:a 830:a'
  end
  
  @mapper.map :title_added_entry_t do |rec,index|
    rec.values_for '700', 't'
  end
  
  # title sort
  @mapper.map :title_sort do |rec,index|
    rec.extract '245:a'
  end

  @mapper.map :author_t do |rec,index|
    rec.extract '100:a 110:a 111:a 130:a 700:a 710:a 711:a'
  end
  
  @mapper.map :published_t do |rec,index|
    rec.extract '260:a'
  end
  
  @mapper.map :isbn_t do |rec,index|
    rec.isbn # in MARCRecordExt module
  end
  
  @mapper.map :material_type_t do |rec,index|
    rec.values_for '300', 'a'
  end

  # subject (text)
  @mapper.map :subject_t do |rec,index|
    rec.extract '600:a 610:a 611:a 630:a 650:a 651:a 655:a 690:a'
  end
  
  # subject (facets)
  @mapper.map :subject_era_facet do |rec,index|
    rec.extract '650:d 650:y 651:y 655:y'
  end
  
  @mapper.map :geographic_subject_facet do |rec,index|
    rec.extract '650:c 650:z 651:a 651:x 651:z 655:z'
  end

  @mapper.map :language_facet do |rec,index|
    rec.languages # in MARCRecordExt module
  end
  
  # format fields
  @mapper.map :format_facet do |rec,index|
    rec.format # in MARCRecordExt module
  end
  
  # downcased, format, spaces converted to _
  # This can be used for the partial view mapping
  @mapper.map :format_code_t do |rec,index|
    rec.format.to_s.downcase.gsub(/ _/, ' ').gsub(/ /, '_')
  end
  
  # grab some vernacular for demonstration purposes (e.g. "did you mean")
  @mapper.map :vern_t do |rec,index|
    rec.extract '880:a 880:b 880:c 880:e 880:f 880:p 880:t'
  end
  
  # _display is stored, but not indexed
  # don't store a string, store marc21 so we can read it back out
  # into a MARC::Record object 
  @mapper.map :marc_display do |rec,index|
    rec.to_xml
  end
  
  reader = MARC::Reader.new(marc_file)
  
  @mapper.run(reader, &blk)
  
end