Class: Cabriolet::Streaming::StreamParser
- Inherits:
-
Object
- Object
- Cabriolet::Streaming::StreamParser
- Defined in:
- lib/cabriolet/streaming.rb
Overview
Stream-based archive parser
Constant Summary collapse
- DEFAULT_CHUNK_SIZE =
64KB chunks
65_536
Instance Method Summary collapse
-
#each_file {|file| ... } ⇒ Enumerator
Iterate over files without loading entire archive into memory.
-
#extract_streaming(output_dir, **_options) ⇒ Hash
Extract files using streaming to minimize memory usage.
-
#initialize(path, chunk_size: DEFAULT_CHUNK_SIZE) ⇒ StreamParser
constructor
A new instance of StreamParser.
-
#stream_file_data(file) {|chunk| ... } ⇒ Enumerator
Stream file data in chunks.
Constructor Details
#initialize(path, chunk_size: DEFAULT_CHUNK_SIZE) ⇒ StreamParser
Returns a new instance of StreamParser.
10 11 12 13 14 15 |
# File 'lib/cabriolet/streaming.rb', line 10 def initialize(path, chunk_size: DEFAULT_CHUNK_SIZE) @path = path @chunk_size = chunk_size @format = FormatDetector.detect(path) raise UnsupportedFormatError, "Unable to detect format" unless @format end |
Instance Method Details
#each_file {|file| ... } ⇒ Enumerator
Iterate over files without loading entire archive into memory
30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/cabriolet/streaming.rb', line 30 def each_file(&) return enum_for(:each_file) unless block_given? case @format when :cab stream_cab_files(&) when :chm stream_chm_files(&) else # Fallback to standard parsing for unsupported streaming formats archive = Cabriolet::Auto.open(@path) archive.files.each(&) end end |
#extract_streaming(output_dir, **_options) ⇒ Hash
Extract files using streaming to minimize memory usage
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/cabriolet/streaming.rb', line 78 def extract_streaming(output_dir, **) FileUtils.mkdir_p(output_dir) stats = { extracted: 0, bytes: 0, failed: 0 } each_file do |file| output_path = File.join(output_dir, file.name.gsub("\\", "/")) FileUtils.mkdir_p(File.dirname(output_path)) File.open(output_path, "wb") do |out| stream_file_data(file) do |chunk| out.write(chunk) end end stats[:extracted] += 1 stats[:bytes] += file.size if file.respond_to?(:size) rescue StandardError => e stats[:failed] += 1 warn "Failed to extract #{file.name}: #{e.}" end stats end |
#stream_file_data(file) {|chunk| ... } ⇒ Enumerator
Stream file data in chunks
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/cabriolet/streaming.rb', line 56 def stream_file_data(file, &) return enum_for(:stream_file_data, file) unless block_given? if file.respond_to?(:stream_data) file.stream_data(chunk_size: @chunk_size, &) else # Fallback: load entire file and yield in chunks data = file.data offset = 0 while offset < data.bytesize chunk = data.byteslice(offset, @chunk_size) yield chunk offset += @chunk_size end end end |