Class: Topical::Clustering::HDBSCANAdapter

Inherits:
Adapter
  • Object
show all
Defined in:
lib/topical/clustering/hdbscan_adapter.rb

Overview

Adapter for ClusterKit’s HDBSCAN implementation

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Adapter

#n_clusters, #n_noise_points

Constructor Details

#initialize(min_cluster_size: 5, min_samples: 3, metric: 'euclidean') ⇒ HDBSCANAdapter

Returns a new instance of HDBSCANAdapter.



9
10
11
12
13
14
15
16
17
18
19
# File 'lib/topical/clustering/hdbscan_adapter.rb', line 9

def initialize(min_cluster_size: 5, min_samples: 3, metric: 'euclidean')
  @min_cluster_size = min_cluster_size
  @min_samples = min_samples
  @metric = metric
  
  @clusterer = ClusterKit::Clustering::HDBSCAN.new(
    min_cluster_size: min_cluster_size,
    min_samples: min_samples,
    metric: metric
  )
end

Instance Attribute Details

#clustererObject (readonly)

Access to underlying ClusterKit object if needed



43
44
45
# File 'lib/topical/clustering/hdbscan_adapter.rb', line 43

def clusterer
  @clusterer
end

Instance Method Details

#fit(embeddings) ⇒ Object



27
28
29
30
# File 'lib/topical/clustering/hdbscan_adapter.rb', line 27

def fit(embeddings)
  @clusterer.fit(embeddings)
  self
end

#fit_predict(embeddings) ⇒ Object



21
22
23
24
25
# File 'lib/topical/clustering/hdbscan_adapter.rb', line 21

def fit_predict(embeddings)
  labels = @clusterer.fit_predict(embeddings)
  update_stats(labels)
  labels
end

#predict(embeddings) ⇒ Object



32
33
34
35
36
37
38
39
40
# File 'lib/topical/clustering/hdbscan_adapter.rb', line 32

def predict(embeddings)
  # HDBSCAN doesn't have a separate predict method
  # For new points, we'd need to use approximate prediction
  if @clusterer.respond_to?(:approximate_predict)
    @clusterer.approximate_predict(embeddings)
  else
    raise NotImplementedError, "HDBSCAN does not support prediction on new data"
  end
end