Class: Cabriolet::Extraction::Extractor

Inherits:

Object

Object
Cabriolet::Extraction::Extractor

show all

Defined in:: lib/cabriolet/extraction/extractor.rb

Overview

Unified extractor using Fractor for parallel file extraction Single workers: 1 = sequential, N = parallel

Constant Summary collapse

DEFAULT_WORKERS =

Instance Attribute Summary collapse

#archive ⇒ Object readonly

Returns the value of attribute archive.
#output_dir ⇒ Object readonly

Returns the value of attribute output_dir.
#stats ⇒ Object readonly

Returns the value of attribute stats.
#workers ⇒ Object readonly

Returns the value of attribute workers.

Instance Method Summary collapse

#extract_all ⇒ Hash

Extract all files from archive.
#extract_with_progress {|current, total, file| ... } ⇒ Hash

Extract files with progress callback.
#initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options) ⇒ Extractor constructor

A new instance of Extractor.

Constructor Details

#initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options) ⇒ `Extractor`

Returns a new instance of Extractor.

# File 'lib/cabriolet/extraction/extractor.rb', line 16

def initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options)
  @archive = archive
  @output_dir = output_dir
  @workers = [workers, 1].max # At least 1 worker
  @preserve_paths = options.fetch(:preserve_paths, true)
  @overwrite = options.fetch(:overwrite, false)
  @stats = { extracted: 0, skipped: 0, failed: 0, bytes: 0 }
end

Instance Attribute Details

#archive ⇒ `Object` (readonly)

Returns the value of attribute archive.



14
15
16

# File 'lib/cabriolet/extraction/extractor.rb', line 14

def archive
  @archive
end

#output_dir ⇒ `Object` (readonly)

Returns the value of attribute output_dir.



14
15
16

# File 'lib/cabriolet/extraction/extractor.rb', line 14

def output_dir
  @output_dir
end

#stats ⇒ `Object` (readonly)

Returns the value of attribute stats.



14
15
16

# File 'lib/cabriolet/extraction/extractor.rb', line 14

def stats
  @stats
end

#workers ⇒ `Object` (readonly)

Returns the value of attribute workers.



14
15
16

# File 'lib/cabriolet/extraction/extractor.rb', line 14

def workers
  @workers
end

Instance Method Details

#extract_all ⇒ `Hash`

Extract all files from archive

Returns:

(Hash) —

Extraction statistics

# File 'lib/cabriolet/extraction/extractor.rb', line 28

def extract_all
  FileUtils.mkdir_p(@output_dir)

  # Create work items for all files
  work_items = @archive.files.map do |file|
    FileExtractionWork.new(
      file,
      output_dir: @output_dir,
      preserve_paths: @preserve_paths,
      overwrite: @overwrite,
    )
  end

  # Create supervisor with workers
  supervisor = Fractor::Supervisor.new(
    worker_pools: [
      {
        worker_class: FileExtractionWorker,
        num_workers: @workers,
      },
    ],
  )

  # Add all work items
  supervisor.add_work_items(work_items)

  # Run extraction
  supervisor.run

  # Collect results
  collect_stats(supervisor.results)

  @stats
end

#extract_with_progress {|current, total, file| ... } ⇒ `Hash`

Extract files with progress callback

Yields:

(current, total, file) —

Progress callback

Returns:

(Hash) —

Extraction statistics

# File 'lib/cabriolet/extraction/extractor.rb', line 67

def extract_with_progress(&block)
  return extract_all unless block

  FileUtils.mkdir_p(@output_dir)

  # For progress tracking, we need to process in batches
  # or use a custom approach since Fractor doesn't have built-in callbacks
  total = @archive.files.count
  current = 0

  # Sequential mode uses simple iteration with progress
  if @workers == 1
    @archive.files.each do |file|
      extract_single_file(file)
      current += 1
      yield(current, total, file)
    end
    return @stats
  end

  # Parallel mode: batch files for progress updates
  batch_size = [@archive.files.count / @workers, 1].max
  batches = @archive.files.each_slice(batch_size).to_a

  batches.each do |batch|
    work_items = batch.map do |file|
      FileExtractionWork.new(
        file,
        output_dir: @output_dir,
        preserve_paths: @preserve_paths,
        overwrite: @overwrite,
      )
    end

    supervisor = Fractor::Supervisor.new(
      worker_pools: [
        {
          worker_class: FileExtractionWorker,
          num_workers: @workers,
        },
      ],
    )

    supervisor.add_work_items(work_items)
    supervisor.run

    batch.each do |file|
      current += 1
      yield(current, total, file)
    end
  end

  @stats
end

Class: Cabriolet::Extraction::Extractor

Overview

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options) ⇒ Extractor

Instance Attribute Details

#archive ⇒ Object (readonly)

#output_dir ⇒ Object (readonly)

#stats ⇒ Object (readonly)

#workers ⇒ Object (readonly)