Class: Topical::Topic

Inherits:
Object
  • Object
show all
Defined in:
lib/topical/topic.rb

Overview

Represents a discovered topic

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(id:, document_indices:, documents:, embeddings:, metadata: nil) ⇒ Topic

Returns a new instance of Topic.



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/topical/topic.rb', line 10

def initialize(id:, document_indices:, documents:, embeddings:, metadata: nil)
  @id = id
  @document_indices = document_indices
  @documents = documents
  @embeddings = embeddings
  @metadata =  || []
  @terms = []
  @label = nil
  @description = nil
  @coherence = nil
  @distinctiveness = 0.0
end

Instance Attribute Details

#coherenceObject

Compute topic coherence (simple PMI-based score)



48
49
50
# File 'lib/topical/topic.rb', line 48

def coherence
  @coherence ||= compute_coherence
end

#descriptionObject

Returns the value of attribute description.



7
8
9
# File 'lib/topical/topic.rb', line 7

def description
  @description
end

#distinctivenessObject

Returns the value of attribute distinctiveness.



7
8
9
# File 'lib/topical/topic.rb', line 7

def distinctiveness
  @distinctiveness
end

#document_indicesObject (readonly)

Returns the value of attribute document_indices.



6
7
8
# File 'lib/topical/topic.rb', line 6

def document_indices
  @document_indices
end

#documentsObject (readonly)

Returns the value of attribute documents.



6
7
8
# File 'lib/topical/topic.rb', line 6

def documents
  @documents
end

#embeddingsObject (readonly)

Returns the value of attribute embeddings.



6
7
8
# File 'lib/topical/topic.rb', line 6

def embeddings
  @embeddings
end

#idObject (readonly)

Returns the value of attribute id.



6
7
8
# File 'lib/topical/topic.rb', line 6

def id
  @id
end

#labelObject

Returns the value of attribute label.



7
8
9
# File 'lib/topical/topic.rb', line 7

def label
  @label
end

#metadataObject (readonly)

Returns the value of attribute metadata.



6
7
8
# File 'lib/topical/topic.rb', line 6

def 
  @metadata
end

#termsObject

Returns the value of attribute terms.



7
8
9
# File 'lib/topical/topic.rb', line 7

def terms
  @terms
end

Class Method Details

.from_h(hash) ⇒ Object

Create from hash



67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/topical/topic.rb', line 67

def self.from_h(hash)
  topic = new(
    id: hash[:id],
    document_indices: hash[:document_indices],
    documents: [],  # Would need to be reconstructed
    embeddings: []   # Would need to be reconstructed
  )
  topic.label = hash[:label]
  topic.description = hash[:description]
  topic.terms = hash[:terms]
  topic.coherence = hash[:coherence] || 0.0
  topic.distinctiveness = hash[:distinctiveness] || 0.0
  topic
end

Instance Method Details

#centroidObject

Compute the centroid of the topic



29
30
31
# File 'lib/topical/topic.rb', line 29

def centroid
  @centroid ||= compute_centroid
end

#representative_docs(k: 3) ⇒ Array<String>

Get the most representative documents

Parameters:

  • k (Integer) (defaults to: 3)

    Number of documents to return

Returns:

  • (Array<String>)

    Representative documents



36
37
38
39
40
41
42
43
44
45
# File 'lib/topical/topic.rb', line 36

def representative_docs(k: 3)
  return @documents if @documents.length <= k
  
  # Find documents closest to centroid
  distances = @embeddings.map { |embedding| distance_to_centroid(embedding) }
  
  # Get indices of k smallest distances
  top_indices = distances.each_with_index.sort_by(&:first).first(k).map(&:last)
  top_indices.map { |i| @documents[i] }
end

#sizeObject

Number of documents in this topic



24
25
26
# File 'lib/topical/topic.rb', line 24

def size
  @documents.length
end

#to_hObject

Convert to hash for serialization



53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/topical/topic.rb', line 53

def to_h
  {
    id: @id,
    label: @label,
    description: @description,
    size: size,
    terms: @terms,
    coherence: @coherence,
    distinctiveness: @distinctiveness,
    document_indices: @document_indices
  }
end