Module: Html2rss
- Defined in:
- lib/html2rss.rb,
lib/html2rss/cli.rb,
lib/html2rss/item.rb,
lib/html2rss/utils.rb,
lib/html2rss/config.rb,
lib/html2rss/version.rb,
lib/html2rss/auto_source.rb,
lib/html2rss/rss_builder.rb,
lib/html2rss/config/channel.rb,
lib/html2rss/item_extractors.rb,
lib/html2rss/config/selectors.rb,
lib/html2rss/rss_builder/item.rb,
lib/html2rss/auto_source/article.rb,
lib/html2rss/auto_source/channel.rb,
lib/html2rss/auto_source/cleanup.rb,
lib/html2rss/auto_source/reducer.rb,
lib/html2rss/auto_source/scraper.rb,
lib/html2rss/rss_builder/channel.rb,
lib/html2rss/item_extractors/href.rb,
lib/html2rss/item_extractors/html.rb,
lib/html2rss/item_extractors/text.rb,
lib/html2rss/item_extractors/static.rb,
lib/html2rss/rss_builder/stylesheet.rb,
lib/html2rss/auto_source/rss_builder.rb,
lib/html2rss/object_to_xml_converter.rb,
lib/html2rss/auto_source/scraper/html.rb,
lib/html2rss/attribute_post_processors.rb,
lib/html2rss/item_extractors/attribute.rb,
lib/html2rss/auto_source/scraper/schema.rb,
lib/html2rss/attribute_post_processors/base.rb,
lib/html2rss/attribute_post_processors/gsub.rb,
lib/html2rss/auto_source/scraper/schema/base.rb,
lib/html2rss/auto_source/scraper/semantic_html.rb,
lib/html2rss/attribute_post_processors/template.rb,
lib/html2rss/attribute_post_processors/parse_uri.rb,
lib/html2rss/attribute_post_processors/substring.rb,
lib/html2rss/attribute_post_processors/parse_time.rb,
lib/html2rss/attribute_post_processors/sanitize_html.rb,
lib/html2rss/auto_source/scraper/semantic_html/image.rb,
lib/html2rss/attribute_post_processors/html_to_markdown.rb,
lib/html2rss/attribute_post_processors/markdown_to_html.rb,
lib/html2rss/auto_source/scraper/semantic_html/extractor.rb,
lib/html2rss/attribute_post_processors/html_transformers/wrap_img_in_a.rb,
lib/html2rss/attribute_post_processors/html_transformers/transform_urls_to_absolute_ones.rb
Overview
The Html2rss namespace.
Defined Under Namespace
Modules: AttributePostProcessors, ItemExtractors, RssBuilder, Utils Classes: AutoSource, CLI, Config, Error, Item, ObjectToXmlConverter
Constant Summary collapse
- Log =
The logger instance.
Logger.new($stderr)
- CONFIG_KEY_FEEDS =
Key for the feeds configuration in the YAML file.
:feeds
- VERSION =
'0.15.0'
Class Method Summary collapse
-
.auto_source(url) ⇒ RSS::Rss
Scrapes the provided URL and returns an RSS object.
-
.feed(config) ⇒ RSS::Rss
Returns an RSS object generated from the provided configuration.
-
.feed_from_yaml_config(file, name = nil, global_config: {}, params: {}) ⇒ RSS::Rss
Returns an RSS object generated from the provided YAML file configuration.
Class Method Details
.auto_source(url) ⇒ RSS::Rss
Scrapes the provided URL and returns an RSS object. No need for a “feed config”.
103 104 105 106 107 108 109 |
# File 'lib/html2rss.rb', line 103 def self.auto_source(url) url = Addressable::URI.parse(url) response = Html2rss::Utils.request_url(url) Html2rss::AutoSource.new(url, body: response.body, headers: response.headers).build end |
.feed(config) ⇒ RSS::Rss
Returns an RSS object generated from the provided configuration.
Example:
feed = Html2rss.feed(
channel: { name: 'StackOverflow: Hot Network Questions', url: 'https://stackoverflow.com' },
selectors: {
items: { selector: '#hot-network-questions > ul > li' },
title: { selector: 'a' },
link: { selector: 'a', extractor: 'href' }
}
)
# => #<RSS::Rss:0x00007fb2f48d14a0 ...>
72 73 74 75 |
# File 'lib/html2rss.rb', line 72 def self.feed(config) config = Config.new(config) unless config.is_a?(Config) RssBuilder.build(config) end |
.feed_from_yaml_config(file, name = nil, global_config: {}, params: {}) ⇒ RSS::Rss
Returns an RSS object generated from the provided YAML file configuration.
Example:
feed = Html2rss.feed_from_yaml_config(File.join(['spec', 'config.test.yml']), 'nuxt-releases')
# => #<RSS::Rss:0x00007fb2f6331228
46 47 48 49 50 51 52 53 |
# File 'lib/html2rss.rb', line 46 def self.feed_from_yaml_config(file, name = nil, global_config: {}, params: {}) yaml = YAML.safe_load_file(file, symbolize_names: true) feeds = yaml[CONFIG_KEY_FEEDS] || {} feed_config = find_feed_config(yaml, feeds, name, global_config) feed(Config.new(feed_config, global_config, params)) end |