# = ArrayIO # Array-like behavior for archival files.

# # == Introduction # Archival files contain many entries following a regular pattern. These files often grow very large, making them # inconvenient or impossible to parse directly into memory. ArrayIO provides an easy way to index these files # so that array-like calls can retrieve entries on the fly. # # Internally ArrayIO keeps an IO object for the archive, and an index of ranges recording where each entry # begins and ends. The index is an array, unless ArrayIO operates in ‘uncached’ mode. In this case the index is # a separate file from which the ranges can be looked up. Uncached mode is useful when dealing with an extremely # large number of ranges (entries) that would chew up lots of memory if kept in an array. # # When operating in a writable mode, entries can be added, inserted, and deleted. All changes are recorded in the index # and will not reflect the actual order of entries in the archive unless consolidated.

# # Example: If you add an entry at index 1000, the index records the range at index 1000, but the actual entry is appended # to the end of the archive. If you delete an entry, it remains in the archive, but the range is removed from the index.

# Consolidation re-writes entries in their proper order and removes deleted entries. # # Notes: # - BE CAREFUL to specify the correct mode when you open up an ArrayIO - as with File, a ‘w’ mode will overwrite # the ENTIRE archive file. It is safer to use the append mode ‘a’. # # Copyright © 2007 Simon Chiang # Version: 0.1 # Licence: MIT-Style # # == Usage # # # Open, autoindex and work with ‘archive.txt’ # ArrayIO.open(‘archive.txt’, ‘r’) do |aio| # aio # -> entry 100 # aio = “new entry” # -> reassigns entry 100 # aio.each do |entry| # # do something # end # end # # # Open ‘archive.txt’ in uncached mode. This creates a file ‘archive.index’ # # that will be filled with the entry ranges. You can specify where entries # # begin and end using options and a block in reindex. # # If the block returns true, the line in considered the beginning of # # an entry. This block looks for entries delmited by ‘>’ like: # # > entry 0 # # still entry0 # # > entry 1 # #

# aio = ArrayIO.new(‘archive.txt’, ‘ru’) # aio.reindex do |line| # line =~ /^>/ # end # aio.close # # # Subclass ArrayIO by overwriting str_to_entry, entry_to_str, and reindex # # EntryIO parses entries as above, and functions like: # # entryio # => [0, “nstill entry 0”] # # entryio # => [1, “”] # # entryio = [100, “ is the new entry”] # => writes “> entry 100 is the new entry” # # entryio # => [100, “ is the new entry”] # class EntryIO # def str_to_entry(str) # str =~ /^> entry (d+)(.*)$“ # [$1, $2] # end # # def entry_to_str(entry) # ”> entry #0#1“ # end # # def reindex(options={}, &block) # super(options) do |line| # block_given? ? yield(line) : line =~ /^>/ # end # end # end