Class: Textract::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/textract.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, selectors) ⇒ Client

Returns a new instance of Client.


69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/textract.rb', line 69

def initialize(url, selectors)
  @url = url
  agent = Mechanize.new
  agent.user_agent_alias = 'Mac Safari'
  @html = agent.get(url).content
  @tags = Textract.get_og_tags(@html)

  @article = Textract.get_text_from_description(@html, @tags.description, selectors)
  @text = ReverseMarkdown.convert @article.content, unknown_tags: :bypass
  @author = @article.author || Textract.get_author(@html)
  @title = @tags.title || Textract.get_page_title(@html)
end

Instance Attribute Details

#authorObject (readonly)

Returns the value of attribute author.


67
68
69
# File 'lib/textract.rb', line 67

def author
  @author
end

#htmlObject (readonly)

Returns the value of attribute html.


62
63
64
# File 'lib/textract.rb', line 62

def html
  @html
end

#tagsObject (readonly)

Returns the value of attribute tags.


64
65
66
# File 'lib/textract.rb', line 64

def tags
  @tags
end

#textObject (readonly)

Returns the value of attribute text.


66
67
68
# File 'lib/textract.rb', line 66

def text
  @text
end

#titleObject (readonly)

Returns the value of attribute title.


65
66
67
# File 'lib/textract.rb', line 65

def title
  @title
end

#urlObject (readonly)

Returns the value of attribute url.


63
64
65
# File 'lib/textract.rb', line 63

def url
  @url
end