Class: GamesAndRpgParadise::DSA::ObtainInformationForDsaAdventureBooks

Inherits:
Base
  • Object
show all
Defined in:
lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb

Overview

GamesAndRpgParadise::DSA::ObtainInformationForDsaAdventureBooks

Constant Summary collapse

NAMESPACE =
#

NAMESPACE

#
inspect
DOWNLOAD_N_PAGES =
#

DOWNLOAD_N_PAGES

#
100
REMOTE_URL =
#

REMOTE_URL

Something changed with the upstream URL, e. g. it is now ulisses-spiele.de/spielsysteme/dsa5/.

#
'https://ulisses-spiele.de/produkte/'
START_POINT =
#

START_POINT

#
'802'
END_POINT =
#

END_POINT

#
'700'
STORE_INTO_THIS_DIRECTORY =
#

STORE_INTO_THIS_DIRECTORY

#
'/home/x/Temp/dsa/'
SHALL_WE_DOWNLOAD_THE_HTML_FILES =
#

SHALL_WE_DOWNLOAD_THE_HTML_FILES

#
false
HASH_MONTH_NAMES =
#

HASH_MONTH_NAMES

#
{
   '1' => 'Jan',
   '2' => 'Feb',
   '3' => 'Mar',
   '4' => 'Apr',
   '5' => 'May',
   '6' => 'Jun',
   '7' => 'Jul',
   '8' => 'Aug',
   '9' => 'Sep',
  '10' => 'Oct',
  '11' => 'Nov',
  '12' => 'Dec'
}

Constants included from Base::Extensions::Colours

Base::Extensions::Colours::ARRAY_AVAILABLE_KONSOLE_COLOURS

Constants included from CommonExtensions

CommonExtensions::CONTROL_C_CODE, CommonExtensions::N

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Base::Extensions::Colours

ecomment, #efancy, #eparse, #forestgreen, #gold, #grey, #lightblue, #mediumseagreen, #mediumslateblue, #peru, #rev, sdir, sfancy, #sfile, simp, #teal, #yellow

Methods included from Base::Extensions::CommandlineArguments

#commandline_arguments?, #filter_away_commandline_arguments, #first_argument?, #first_non_hyphened_argument?, #set_commandline_arguments

Methods included from CommonExtensions

#cat, #cd, #cliner, #copy_file, #delete, #dirname_but_retains_the_trailing_slash, #disable_colours, #ensure_that_the_log_directory_exists, #esystem, #get_user_input, #infer_the_namespace, #is_on_roebe?, #log_dir?, #mkdir, #mkdir_then_cd_into_it, #mv, #namespace?, #opne, #opnn, #project_base_directory?, #project_image_directory?, #project_yaml_directory?, #rds, #register_sigint, #remove_this_directory, #rename_file, #reset_the_internal_hash, #return_pwd, #return_today, #touch_file, #wrap, #write_what_into

Constructor Details

#initialize(i = ARGV, run_already = true) ⇒ ObtainInformationForDsaAdventureBooks

#

initialize

#


88
89
90
91
92
93
94
95
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 88

def initialize(
    i           = ARGV,
    run_already = true
  )
  reset
  set_commandline_arguments(i)
  run if run_already
end

Class Method Details

.[](i = '') ⇒ Object

#

GamesAndRpgParadise::DSA::ObtainInformationForDsaAdventureBooks[]

#


248
249
250
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 248

def self.[](i = '')
  new(i)
end

Instance Method Details

#download_the_remote_dataset_into_local_html_files(start_point = START_POINT.to_i, end_point = END_POINT.to_i) ⇒ Object

#

download_the_remote_dataset_into_local_html_files

First, we will batch-download all the remote dataset.

#


160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 160

def download_the_remote_dataset_into_local_html_files(
    start_point = START_POINT.to_i,
    end_point   = END_POINT.to_i
  )
  begin
    require 'cyberweb/web_scraper/scrape_this_url.rb'
  rescue LoadError; end
  start_point.downto(end_point) {|counter|
    full_remote_url = REMOTE_URL+counter.to_s
    e royalblue(full_remote_url)
    Cyberweb::ScrapeThisUrl.set_start_urls(full_remote_url)
    store_here = base_dir?+counter.to_s+'.html'
    opnn; e "Will store into the file `#{sfile(store_here)}`."
    Cyberweb::ScrapeThisUrl.set_store_into_this_local_file(store_here)
    Cyberweb::ScrapeThisUrl.crawl!
  }
  e 'All downloaded .html files should be available in: '+
    sdir(base_dir?)
end
#

menu (menu tag)

#


136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 136

def menu(i)
  if i.is_a? Array
    i.each {|entry| menu(entry) }
  else
    case i
    # ===================================================================== #
    # === obtain_information_for_dsa_adventure_books --download
    #
    # Or a shorter example:
    #
    #   obtaindsa --download
    #
    # ===================================================================== #
    when /^-?-?download$/
      @download_the_html_files = true
    end
  end
end

#obtain_the_relevant_dataset_from_the_local_html_filesObject

#

obtain_the_relevant_dataset_from_the_local_html_files

#


200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 200

def obtain_the_relevant_dataset_from_the_local_html_files
  opnn; e 'Next trying to obtain the relevant dataset from '+
           sdir(store_into_this_directory)
  require 'cyberweb/toplevel_methods/umlaute.rb'
  # ======================================================================= #
  # First define the Regexes that we are going to use:
  # ======================================================================= #
  regex_for_title_of_the_book =
    /<h2 class="productTitel">(.+)<\/h2>/
  # ======================================================================= #
  # Regex to capture the release date.
  # ======================================================================= #
  regex_for_release_date = # <- Will have two matches.
    /<span class="news-element-release-month cf1" style="font-size:2.2em;font-weight:bold;">([A-Za-z]{1,3})<\/span><span class="news-element-release-year">(\d{4})<\/span>/
  work_on_these_files = Dir["#{store_into_this_directory}*.html"].sort.reverse # Keep it sorted; then reverse it.
  work_on_these_files.each {|this_html_file|
    dataset = File.read(this_html_file)
    dataset =~ regex_for_title_of_the_book
    title = $1.to_s.dup.ljust(34)
    # ===================================================================== #
    # The title may have oddly encoded characters, so sanitize
    # them next.
    # ===================================================================== #
    title = Cyberweb.convert_back_german_umlauts(title)
    title.gsub!(/&amp;/,'&')
    dataset =~ regex_for_release_date
    month = $1.to_s.dup
    # ===================================================================== #
    # Convert the month into month-names:
    # ===================================================================== #
    month = HASH_MONTH_NAMES[month.to_i.to_s]
    year  = $2.to_s.dup
    release_date = '   '+(month.to_s+' '+year.to_s).ljust(14)
    remote_url = REMOTE_URL+File.basename(this_html_file).sub(/\.html$/,'')
    # ===================================================================== #
    # Next, build up the result-string:
    # ===================================================================== #
    result = title+' | '+
             ' ---   | '+
             release_date+' | '+
             remote_url
    e result # <- And display the result finally.
  }
end

#resetObject

#

reset (reset tag)

#


100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 100

def reset
  # ======================================================================= #
  # === @namespace
  # ======================================================================= #
  @namespace = NAMESPACE
  # ======================================================================= #
  # === @counter
  # ======================================================================= #
  @counter = start_point?.to_i
  # ======================================================================= #
  # === @download_the_html_files
  #
  # The next variable can be manually toggled. If set to true then this
  # class will download the remote dataset again.
  # ======================================================================= #
  @download_the_html_files = SHALL_WE_DOWNLOAD_THE_HTML_FILES
end

#runObject

#

run (run tag)

#


183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 183

def run
  mkdir(store_into_this_directory) unless File.directory? store_into_this_directory
  menu(commandline_arguments?)
  if @download_the_html_files
    download_the_remote_dataset_into_local_html_files
  end
  # ======================================================================= #
  # The next step will be to obtain the necessary information from our
  # locally stored .html files.
  # ======================================================================= #
  cd store_into_this_directory
  obtain_the_relevant_dataset_from_the_local_html_files
end

#start_point?Boolean

#

start_point?

#

Returns:

  • (Boolean)


121
122
123
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 121

def start_point?
  START_POINT
end

#store_into_this_directoryObject Also known as: base_dir?, base_dir

#

store_into_this_directory

#


128
129
130
# File 'lib/games_and_rpg_paradise/rpg/dsa/obtain_information_for_dsa_adventure_books.rb', line 128

def store_into_this_directory
  STORE_INTO_THIS_DIRECTORY
end