Class: KCNA

Inherits:
Object
  • Object
show all
Defined in:
lib/kcna.rb,
lib/kcna/version.rb

Overview

KCNA provides several methods for accessing KCNA resource.

Defined Under Namespace

Classes: Article

Constant Summary collapse

KO =
"kor"
EN =
"eng"
ZH =
"chn"
RU =
"rus"
ES =
"spn"
JA =
"jpn"
VERSION =
"0.3.0"

Instance Method Summary collapse

Constructor Details

#initializeKCNA

Returns a new instance of KCNA.


17
18
19
# File 'lib/kcna.rb', line 17

def initialize
  @client = HTTPClient.new
end

Instance Method Details

#get_article(article_id) ⇒ KCNA::Article

Fetches the article by article ID. The content of the article is already processed by #normalize_text, so you don’t have to do it by youself.

Parameters:

  • article_id (String)

    article ID.

Returns:


74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/kcna.rb', line 74

def get_article(article_id)
  doc = REXML::Document.new(fetch_article(article_id))
  container = REXML::XPath.first(doc, "//NData")
  raise "Article not found" if container.elements.size == 0

  date = Date.strptime(REXML::XPath.first(doc, "//articleCreateDate").text, "%Y.%m.%d")
  content = normalize_text(REXML::XPath.first(doc, "//content").text)
  display_title = normalize_text(REXML::XPath.first(doc, "//dispTitle").text)
  main_title = normalize_text(REXML::XPath.first(doc, "//mainTitle").text)
  sub_title = normalize_text(REXML::XPath.first(doc, "//subTitle").text)
  article_id = REXML::XPath.first(doc, "//articleCode").text
  movie_count = REXML::XPath.first(doc, "//fMovieCnt").text.to_i
  photo_count = REXML::XPath.first(doc, "//fPhotoCnt").text.to_i
  music_count = REXML::XPath.first(doc, "//fMusicCnt").text.to_i

  Article.new(
    article_id, content: content,
    date: date,
    main_title: main_title, sub_title: sub_title, display_title: display_title,
    movie_count: movie_count, photo_count: photo_count, music_count: music_count
  )
end

#get_article_list(start = 0, news_type: "", from_date: "", to_date: "", title_keyword: "", content_keyword: "") ⇒ Array<KCNA::Article>

Fetches a list of articles.

Parameters:

  • start (Integer) (defaults to: 0)

    Index number for pagination.

  • news_type (String) (defaults to: "")

    news type.

  • from_date (Date, String) (defaults to: "")

    This method search articles after this date.

  • to_date (Date, String) (defaults to: "")

    This method search articles before this date.

  • title_keyword (String) (defaults to: "")

    search keyword for title.

  • content_keyword (String) (defaults to: "")

    keyword for full-text search of the articles.

Returns:


110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/kcna.rb', line 110

def get_article_list(start = 0, news_type: "", from_date: "", to_date: "", title_keyword: "", content_keyword: "")
  from_date = from_date.to_s unless from_date.kind_of?(String)
  to_date = to_date.to_s unless to_date.kind_of?(String)

  doc = REXML::Document.new(fetch_article_list(start, news_type, from_date, to_date, title_keyword, content_keyword))
  article_ids = REXML::XPath.match(doc, "//articleCode").map(&:text)
  disp_titles = REXML::XPath.match(doc, "//dispTitle").map { |node| normalize_text(node.text) }
  main_titles = REXML::XPath.match(doc, "//mainTitle").map { |node| normalize_text(node.text) }
  sub_titles = REXML::XPath.match(doc, "//subTitle").map { |node| normalize_text(node.text) }
  dates = REXML::XPath.match(doc, "//sendInfo").map(&:text)
  movie_counts = REXML::XPath.match(doc, "//fMovieCnt").map { |node| node.text.to_i }
  music_counts = REXML::XPath.match(doc, "//fMusicCnt").map { |node| node.text.to_i }
  photo_counts = REXML::XPath.match(doc, "//fPhotoCnt").map { |node| node.text.to_i }

  article_ids.zip(
    disp_titles, main_titles, sub_titles, dates,
    movie_counts, music_counts, photo_counts
  ).map do |id, disp, main, sub, date, movie, music, photo|
    date = "2015-04-02" if id == "AR0060168"
    Article.new(
      id, date: Date.parse(date),
      display_title: disp, main_title: main, sub_title: sub,
      movie_count: movie, music_count: music, photo_count: photo
    )
  end
end

#normalize_text(content) ⇒ Object

Processes raw article content. This method strips HTML tags and trailing unnecessary strings.


27
28
29
30
31
32
33
34
35
36
37
# File 'lib/kcna.rb', line 27

def normalize_text(content)
  replaced_content = content.gsub(/\n|<br>|&nbsp;/) do |match|
    case match
    when "\n", "&nbsp;"
      ""
    when "<br>"
      "\n"
    end
  end.sub(/\s*(---|‐‐‐|―――)\s*\z/, "")
  strip_html(replaced_content)
end

#set_language(lang) ⇒ Object

Sets the response language by sending request to kcna.kp.

Parameters:

  • lang (String)

    the language code. One of KCNA::KO, KCNA::EN, KCNA::ZH, KCNA::RU, KCNA::ES, and KCNA::JA.


55
56
57
58
59
60
61
62
# File 'lib/kcna.rb', line 55

def set_language(lang)
  data = {
    article_code: "", article_type_list: "", news_type_code: "", show_what: "", mediaCode: "",
    lang: lang
  }
  # Cookie is considered automatically by httpclient
  post("/kcna.user.home.retrieveHomeInfoList.kcmsf", data)
end