Class: Webhookdb::Replicator::TransistorEpisodeV1

Inherits:
Base
  • Object
show all
Includes:
Appydays::Loggable, TransistorV1Mixin
Defined in:
lib/webhookdb/replicator/transistor_episode_v1.rb

Constant Summary collapse

BLOCK_ELEMENT_TAGS =
["p", "div"].freeze

Constants included from DBAdapter::ColumnTypes

DBAdapter::ColumnTypes::BIGINT, DBAdapter::ColumnTypes::BIGINT_ARRAY, DBAdapter::ColumnTypes::BOOLEAN, DBAdapter::ColumnTypes::COLUMN_TYPES, DBAdapter::ColumnTypes::DATE, DBAdapter::ColumnTypes::DECIMAL, DBAdapter::ColumnTypes::DOUBLE, DBAdapter::ColumnTypes::FLOAT, DBAdapter::ColumnTypes::INTEGER, DBAdapter::ColumnTypes::INTEGER_ARRAY, DBAdapter::ColumnTypes::OBJECT, DBAdapter::ColumnTypes::TEXT, DBAdapter::ColumnTypes::TEXT_ARRAY, DBAdapter::ColumnTypes::TIMESTAMP, DBAdapter::ColumnTypes::UUID

Constants inherited from Base

Base::MAX_INDEX_NAME_LENGTH

Instance Attribute Summary

Attributes inherited from Base

#service_integration

Class Method Summary collapse

Instance Method Summary collapse

Methods included from TransistorV1Mixin

#_remote_key_column, #_resource_and_event, #_timestamp_column_name, #_update_where_expr, #_verify_backfill_401_err_msg, #_verify_backfill_err_msg, #_webhook_response, #calculate_backfill_state_machine

Methods inherited from Base

#_any_subscriptions_to_notify?, #_backfill_state_change_fields, #_backfillers, #_clear_backfill_information, #_clear_webook_information, #_coalesce_excluded_on_update, #_enqueue_backfill_jobs, #_extra_index_specs, #_find_dependency_candidate, #_notify_dependents, #_parallel_backfill, #_publish_rowupsert, #_remote_key_column, #_resource_and_event, #_resource_to_data, #_store_enrichment_body?, #_timestamp_column_name, #_to_json, #_update_where_expr, #_upsert_update_expr, #_upsert_webhook, #_verify_backfill_err_msg, #_webhook_response, #_webhook_state_change_fields, #admin_dataset, #backfill, #backfill_not_supported_message, #calculate_and_backfill_state_machine, #calculate_backfill_state_machine, #calculate_dependency_state_machine_step, #calculate_preferred_create_state_machine, #calculate_webhook_state_machine, chunked_row_update_bounds, #clear_backfill_information, #clear_webhook_information, #create_table, #create_table_modification, #data_column, #dbadapter_table, #denormalized_columns, #descriptor, #dispatch_request_to, #documentation_url, #enqueue_sync_targets, #enrichment_column, #ensure_all_columns, #ensure_all_columns_modification, #find_dependent, #find_dependent!, #indices, #initialize, #on_backfill_error, #on_dependency_webhook_upsert, #preferred_create_state_machine_method, #preprocess_headers_for_logging, #primary_key_column, #process_state_change, #process_webhooks_synchronously?, #qualified_table_sequel_identifier, #readonly_dataset, #remote_key_column, #requires_sequence?, #resource_name_plural, #resource_name_singular, #schema_and_table_symbols, #storable_columns, #synchronous_processing_response_body, #timestamp_column, #upsert_webhook, #upsert_webhook_body, #verify_backfill_credentials, #webhook_endpoint, #webhook_response

Constructor Details

This class inherits a constructor from Webhookdb::Replicator::Base

Class Method Details

.descriptorWebhookdb::Replicator::Descriptor



12
13
14
15
16
17
18
19
20
21
# File 'lib/webhookdb/replicator/transistor_episode_v1.rb', line 12

def self.descriptor
  return Webhookdb::Replicator::Descriptor.new(
    name: "transistor_episode_v1",
    ctor: ->(sint) { Webhookdb::Replicator::TransistorEpisodeV1.new(sint) },
    feature_roles: [],
    resource_name_singular: "Transistor Episode",
    supports_backfill: true,
    api_docs_url: "https://developers.transistor.fm/#Episode",
  )
end

Instance Method Details

#_denormalized_columnsObject



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/webhookdb/replicator/transistor_episode_v1.rb', line 23

def _denormalized_columns
  return [
    Webhookdb::Replicator::Column.new(:author, TEXT, data_key: ["attributes", "author"]),
    Webhookdb::Replicator::Column.new(
      :created_at,
      TIMESTAMP,
      index: true,
      data_key: ["attributes", "created_at"],
    ),
    Webhookdb::Replicator::Column.new(:duration, INTEGER, data_key: ["attributes", "duration"]),
    Webhookdb::Replicator::Column.new(:keywords, TEXT, data_key: ["attributes", "keywords"]),
    Webhookdb::Replicator::Column.new(:number, INTEGER, index: true, data_key: ["attributes", "number"]),
    Webhookdb::Replicator::Column.new(
      :published_at,
      TIMESTAMP,
      index: true,
      data_key: ["attributes", "published_at"],
    ),
    Webhookdb::Replicator::Column.new(:season, INTEGER, index: true, data_key: ["attributes", "season"]),
    Webhookdb::Replicator::Column.new(
      :show_id,
      TEXT,
      index: true,
      data_key: ["relationships", "show", "data", "id"],
    ),
    Webhookdb::Replicator::Column.new(:status, TEXT, data_key: ["attributes", "status"]),
    Webhookdb::Replicator::Column.new(:title, TEXT, data_key: ["attributes", "title"]),
    Webhookdb::Replicator::Column.new(:type, TEXT, data_key: ["attributes", "type"]),
    Webhookdb::Replicator::Column.new(
      :updated_at,
      TIMESTAMP,
      index: true,
      data_key: ["attributes", "updated_at"],
    ),

    Webhookdb::Replicator::Column.new(:transcript_text, TEXT, optional: true),

    # Ideally these would have converters, but they'd be very confusing, and when this was built
    # we only had one transistor user, so we truncated the table instead.
    Webhookdb::Replicator::Column.new(:api_format, INTEGER, optional: true),
    Webhookdb::Replicator::Column.new(:logical_summary, TEXT, optional: true),
    Webhookdb::Replicator::Column.new(:logical_description, TEXT, optional: true),
  ]
end

#_extract_first_html_line_as_text(element) ⇒ Object

Usually the Transistor HTML looks like <div>foo
hello</div>. Extract ‘foo’ as text, remove leading
, and return <div>hello</div>.



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/webhookdb/replicator/transistor_episode_v1.rb', line 105

def _extract_first_html_line_as_text(element)
  # Grab the first div or p element, where the text is.
  first_div = element.css("div, p").first
  return nil unless first_div
  # Iterate over each child element:
  # - If it's a text element, it's part of the first line.
  # - If it's a br/div/p element, we have reached the end of the first line.
  # - Otherwise, it's probably some type of style element, and can be appended.
  first_line_html = +""
  first_div.children.to_a.each do |child|
    if child.is_a?(Nokogiri::XML::Text)
      first_line_html << child.inner_text
      child.remove
    elsif child.name == "br"
      # Remove additional br tags, this is like
      # removing leading whitespace of the new/remaining description.
      while (sibling = child.next)
        break unless sibling.name == "br"
        sibling.remove
      end
      child.remove
      break
    elsif BLOCK_ELEMENT_TAGS.include?(child.name)
      break
    else
      first_line_html << child.to_html
      child.remove
    end
  end
  first_div.remove if first_div.inner_text.blank?
  return first_line_html.strip
end

#_fetch_backfill_page(pagination_token, last_backfilled:) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/webhookdb/replicator/transistor_episode_v1.rb', line 167

def _fetch_backfill_page(pagination_token, last_backfilled:)
  url = "https://api.transistor.fm/v1/episodes"
  pagination_token = 1 if pagination_token.blank?
  response = Webhookdb::Http.get(
    url,
    headers: {"x-api-key" => self.service_integration.backfill_key},
    body: {pagination: {page: pagination_token, per: 500}},
    logger: self.logger,
    timeout: Webhookdb::Transistor.http_timeout,
  )
  data = response.parsed_response
  episodes = data["data"]
  current_page = data["meta"]["currentPage"]
  total_pages = data["meta"]["totalPages"]
  next_page = (current_page.to_i + 1 if current_page < total_pages)

  if last_backfilled.present?
    earliest_data_created = episodes.empty? ? Time.at(0) : episodes[-1].dig("attributes", "created_at")
    paged_to_already_seen_records = earliest_data_created < last_backfilled

    return episodes, nil if paged_to_already_seen_records
  end

  return episodes, next_page
end

#_fetch_enrichment(resource) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/webhookdb/replicator/transistor_episode_v1.rb', line 140

def _fetch_enrichment(resource, *)
  transcript_url = resource.fetch("attributes").fetch("transcript_url", nil)
  return nil if transcript_url.blank?
  (transcript_url += ".txt") unless transcript_url.end_with?(".txt")
  begin
    resp = Webhookdb::Http.get(
      transcript_url,
      logger: self.logger,
      timeout: Webhookdb::Transistor.http_timeout,
    )
  rescue Webhookdb::Http::Error => e
    # Not sure why this happens, but nothing we can do if it does.
    return nil if e.status == 404
    raise e
  end
  transcript_text = resp.body
  return {transcript_text:}
end

#_prepare_for_insert(resource, event, request, enrichment) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/webhookdb/replicator/transistor_episode_v1.rb', line 68

def _prepare_for_insert(resource, event, request, enrichment)
  h = super
  # Transistor merged their summary and description fields so they're authored
  # as one big 'description' HTML blob in February 2023. Previous to that,
  # there were separate summary and description fields
  # (we call this api_format 1).
  #
  # If we have a nil summary, we know this is a 'new' format (api_format 2).
  # In that case, look for the first line of the HTML,
  # and treat that as the summary. Anything else in the HTML is treated as
  # the remaining description. Some care is paid to whitespace, too,
  # since <br> tags can be used within an element.
  summary = resource.fetch("attributes").fetch("summary", nil)
  description = resource.fetch("attributes").fetch("description", nil)
  if summary.nil?
    h[:api_format] = 2
    parsed_desc = Nokogiri::HTML5.fragment(description)

    extracted_summary = self._extract_first_html_line_as_text(parsed_desc)
    h[:logical_description] = nil
    if extracted_summary
      h[:logical_summary] = extracted_summary
      h[:logical_description] = parsed_desc.to_s.strip if parsed_desc.inner_text.present?
    else
      h[:logical_summary] = parsed_desc.to_s.strip
    end
  else
    h[:logical_summary] = summary
    h[:logical_description] = description
    h[:api_format] = 1
  end
  h.merge!(enrichment) if enrichment
  return h
end

#parse_date_from_api(date_string) ⇒ Object



163
164
165
# File 'lib/webhookdb/replicator/transistor_episode_v1.rb', line 163

def parse_date_from_api(date_string)
  return Time.strptime(date_string, "%d-%m-%Y")
end

#upsert_has_deps?Boolean

Returns:

  • (Boolean)


159
160
161
# File 'lib/webhookdb/replicator/transistor_episode_v1.rb', line 159

def upsert_has_deps?
  return true
end