Class: MARC::Reader

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/marc/reader.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file) ⇒ Reader

The constructor which you may pass either a path

reader = MARC::Reader.new('marc.dat')

or, if it’s more convenient a File object:

fh = File.new('marc.dat')
reader = MARC::Reader.new(fh)

or really any object that responds to read(n)

# marc is a string with a bunch of records in it
reader = MARC::Reader.new(StringIO.new(reader))

If your data have non-standard control fields in them (e.g., Aleph’s ‘FMT’) you need to add them specifically to the MARC::ControlField.control_tags Set object

MARC::ControlField.control_tags << 'FMT'


26
27
28
29
30
31
32
33
34
# File 'lib/marc/reader.rb', line 26

def initialize(file)
  if file.is_a?(String)
    @handle = File.new(file)
  elsif file.respond_to?("read", 5)
    @handle = file
  else
    throw "must pass in path or file"
  end
end

Class Method Details

.decode(marc, params = {}) ⇒ Object

A static method for turning raw MARC data in transission format into a MARC::Record object.



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/marc/reader.rb', line 73

def self.decode(marc, params={})
  record = Record.new()
  record.leader = marc[0..LEADER_LENGTH-1]

  # where the field data starts
  base_address = record.leader[12..16].to_i

  # get the byte offsets from the record directory
  directory = marc[LEADER_LENGTH..base_address-1]

  throw "invalid directory in record" if directory == nil

  # the number of fields in the record corresponds to 
  # how many directory entries there are
  num_fields = directory.length / DIRECTORY_ENTRY_LENGTH

  # when operating in forgiving mode we just split on end of
  # field instead of using calculated byte offsets from the 
  # directory
  all_fields = marc[base_address..-1].split(END_OF_FIELD)

  0.upto(num_fields-1) do |field_num|

    # pull the directory entry for a field out
    entry_start = field_num * DIRECTORY_ENTRY_LENGTH
    entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
    entry = directory[entry_start..entry_end]
    
    # extract the tag
    tag = entry[0..2]

    # get the actual field data
    # if we were told to be forgiving we just use the
    # next available chuck of field data that we 
    # split apart based on the END_OF_FIELD
    field_data = ''
    if params[:forgiving]
      field_data = all_fields.shift()

    # otherwise we actually use the byte offsets in 
    # directory to figure out what field data to extract
    else
      length = entry[3..6].to_i
      offset = entry[7..11].to_i
      field_start = base_address + offset
      field_end = field_start + length - 1
      field_data = marc[field_start..field_end]
    end

    # remove end of field
    field_data.delete!(END_OF_FIELD)
     
    # add a control field or data field
    if MARC::ControlField.control_tag?(tag)
      record.append(MARC::ControlField.new(tag,field_data))
    else
      field = MARC::DataField.new(tag)

      # get all subfields
      subfields = field_data.split(SUBFIELD_INDICATOR)

      # must have at least 2 elements (indicators, and 1 subfield)
      # TODO some sort of logging?
      next if subfields.length() < 2

      # get indicators
      indicators = subfields.shift()
      field.indicator1 = indicators[0,1]
      field.indicator2 = indicators[1,1]

      # add each subfield to the field
      subfields.each() do |data|
        subfield = MARC::Subfield.new(data[0,1],data[1..-1])
        field.append(subfield)
      end

      # add the field to the record
      record.append(field)
    end
  end

  return record
end

Instance Method Details

#eachObject

to support iteration:

for record in reader
  print record
end

and even searching:

record.find { |f| f['245'] =~ /Huckleberry/ }


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/marc/reader.rb', line 44

def each 
  # while there is data left in the file
  while rec_length_s = @handle.read(5)
    # make sure the record length looks like an integer
    rec_length_i = rec_length_s.to_i
    if rec_length_i == 0
      raise MARC::Exception.new("invalid record length: #{rec_length_s}")
    end

    # get the raw MARC21 for a record back from the file
    # using the record length
    raw = rec_length_s + @handle.read(rec_length_i-5)
    
    # Ruby 1.9 will try to set the encoding to ASCII-8BIT, which we don't want.
    # Not entirely sure what happens for MARC-8 encoded records, but, technically,
    # ruby-marc doesn't support MARC-8, anyway.
    raw.force_encoding('utf-8') if raw.respond_to?(:force_encoding)

    # create a record from the data and return it
    #record = MARC::Record.new_from_marc(raw)
    record = MARC::Reader.decode(raw)
    yield record 
  end
end