Class: Cabriolet::Extraction::Extractor

Inherits:
Object
  • Object
show all
Defined in:
lib/cabriolet/extraction/extractor.rb

Overview

Unified extractor using Fractor for parallel file extraction Single workers: 1 = sequential, N = parallel

Constant Summary collapse

DEFAULT_WORKERS =
4

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options) ⇒ Extractor

Returns a new instance of Extractor.



16
17
18
19
20
21
22
23
# File 'lib/cabriolet/extraction/extractor.rb', line 16

def initialize(archive, output_dir, workers: DEFAULT_WORKERS, **options)
  @archive = archive
  @output_dir = output_dir
  @workers = [workers, 1].max # At least 1 worker
  @preserve_paths = options.fetch(:preserve_paths, true)
  @overwrite = options.fetch(:overwrite, false)
  @stats = { extracted: 0, skipped: 0, failed: 0, bytes: 0 }
end

Instance Attribute Details

#archiveObject (readonly)

Returns the value of attribute archive.



14
15
16
# File 'lib/cabriolet/extraction/extractor.rb', line 14

def archive
  @archive
end

#output_dirObject (readonly)

Returns the value of attribute output_dir.



14
15
16
# File 'lib/cabriolet/extraction/extractor.rb', line 14

def output_dir
  @output_dir
end

#statsObject (readonly)

Returns the value of attribute stats.



14
15
16
# File 'lib/cabriolet/extraction/extractor.rb', line 14

def stats
  @stats
end

#workersObject (readonly)

Returns the value of attribute workers.



14
15
16
# File 'lib/cabriolet/extraction/extractor.rb', line 14

def workers
  @workers
end

Instance Method Details

#extract_allHash

Extract all files from archive

Returns:

  • (Hash)

    Extraction statistics



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/cabriolet/extraction/extractor.rb', line 28

def extract_all
  FileUtils.mkdir_p(@output_dir)

  # Create work items for all files
  work_items = @archive.files.map do |file|
    FileExtractionWork.new(
      file,
      output_dir: @output_dir,
      preserve_paths: @preserve_paths,
      overwrite: @overwrite,
    )
  end

  # Create supervisor with workers
  supervisor = Fractor::Supervisor.new(
    worker_pools: [
      {
        worker_class: FileExtractionWorker,
        num_workers: @workers,
      },
    ],
  )

  # Add all work items
  supervisor.add_work_items(work_items)

  # Run extraction
  supervisor.run

  # Collect results
  collect_stats(supervisor.results)

  @stats
end

#extract_with_progress {|current, total, file| ... } ⇒ Hash

Extract files with progress callback

Yields:

  • (current, total, file)

    Progress callback

Returns:

  • (Hash)

    Extraction statistics



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/cabriolet/extraction/extractor.rb', line 67

def extract_with_progress(&block)
  return extract_all unless block

  FileUtils.mkdir_p(@output_dir)

  # For progress tracking, we need to process in batches
  # or use a custom approach since Fractor doesn't have built-in callbacks
  total = @archive.files.count
  current = 0

  # Sequential mode uses simple iteration with progress
  if @workers == 1
    @archive.files.each do |file|
      extract_single_file(file)
      current += 1
      yield(current, total, file)
    end
    return @stats
  end

  # Parallel mode: batch files for progress updates
  batch_size = [@archive.files.count / @workers, 1].max
  batches = @archive.files.each_slice(batch_size).to_a

  batches.each do |batch|
    work_items = batch.map do |file|
      FileExtractionWork.new(
        file,
        output_dir: @output_dir,
        preserve_paths: @preserve_paths,
        overwrite: @overwrite,
      )
    end

    supervisor = Fractor::Supervisor.new(
      worker_pools: [
        {
          worker_class: FileExtractionWorker,
          num_workers: @workers,
        },
      ],
    )

    supervisor.add_work_items(work_items)
    supervisor.run

    batch.each do |file|
      current += 1
      yield(current, total, file)
    end
  end

  @stats
end