Class: Grabepg::GrabTvmao

Inherits:

Object

Object
Grabepg::GrabTvmao

Defined in:: lib/grab_tvmao.rb

Constant Summary collapse

DEFAULT_GrabtvType =

["cctv","satellite","digital",]

DEFAULT_SITE =

"http://www.tvmao.com"

Instance Attribute Summary collapse

#channel ⇒ Object readonly

图片的获取： Net::HTTP.get(url) 图片的文件类型获取：.
#img_down_path ⇒ Object readonly

图片下载路径存放.
#proxyindex ⇒ Object readonly

代理的索引.
#show_schedule ⇒ Object readonly

根据节目的时间表.
#site ⇒ Object readonly

网站地址.

Instance Method Summary collapse

#conversion_what_day(whatday) ⇒ Object

将星期的wday获取值转化为中文名 conversion wady to chinese.
#del_day_num(day_num) ⇒ Object

前几天需要减去的num.
#del_time_list ⇒ Object

前面一周要删除的日期的列表.
#dispose_time(num) ⇒ Object

如果时间为1~9的一位则为其在数字前加0补齐二位.
#err_doc_proxy(proxy, proxylist, url = "", err = "") ⇒ Object
#get_assign_date_url(url, start_time, day_num) ⇒ Object

获取制定时间和长度url start_time 为int型开始时间和今天的差值正数代表之后的第几天负数代表之前的第几天 day_num 为int型代表抓取的时间从开始时间计算的多少天.
#get_doc_with_proxy(proxylist, url) ⇒ Object

使用代理获取url的html的doc值.
#get_proxy_list ⇒ Object

获取代理列表.
#get_schedulelist_atday(channel, url, proxylist) ⇒ Object

获取某天的节目表.
#get_show_infomation(proxy_list, schedule_herf) ⇒ Object

获取节目详细信息.
#get_show_schedule(proxylist, herf) ⇒ Object

获取节目的时间表.
#get_show_type(channel, url, date, time, proxylist) ⇒ Object

批量从tvmao获取节目类型 channel 节目表属于的屏道 url 节目表获取的网络地址 date 日期 time 节目开始时间 proxylist 代理列表.
#get_show_type_by_batch(channel, url, date, schedule, proxylist) ⇒ Object

批量从tvmao获取节目类型 channel 节目表属于的屏道 url 节目表获取的网络地址 date 日期 schedule 需要批量修改的时间表 proxylist 代理列表.
#get_show_type_url(url, date) ⇒ Object
#get_time_day_prior(num) ⇒ Object

获取距离当前多少天的之前的日期.
#get_topfast_list(use_time) ⇒ Object

获取指定访问速度的代理服务器 time为最慢速度的时间 int型代表秒.
#get_week_date_time(time) ⇒ Object

转化当前时间的格式.
#getchannels(img_dir_path) ⇒ Object

获取网站的频道表 img_path 图片存放路径.
#getschedule(channel, herf, proxylist, day_num = 7, img_dir_down_path = @img_down_dir_path) ⇒ Object

因原已调用所以保留获取一周节目表.
#getScheduleAssignDate(channel, herf, proxylist, start_num, day_num = 0, img_dir_down_path = @img_down_dir_path) ⇒ Object

获取指定时间段的节目表.
#gg(url) ⇒ Object
#initialize ⇒ GrabTvmao constructor

A new instance of GrabTvmao.
#save_img ⇒ Object
#start ⇒ Object

调用此方法的例子.

Constructor Details

#initialize ⇒ `GrabTvmao`

Returns a new instance of GrabTvmao.



29
30
31

# File 'lib/grab_tvmao.rb', line 29

def initialize
  @grabbase = GrabBase.new
end

Instance Attribute Details

#channel ⇒ `Object` (readonly)

图片的获取： Net::HTTP.get(url) 图片的文件类型获取：



17
18
19

# File 'lib/grab_tvmao.rb', line 17

def channel
  @channel
end

#img_down_path ⇒ `Object` (readonly)

图片下载路径存放



21
22
23

# File 'lib/grab_tvmao.rb', line 21

def img_down_path
  @img_down_path
end

#proxyindex ⇒ `Object` (readonly)

代理的索引



19
20
21

# File 'lib/grab_tvmao.rb', line 19

def proxyindex
  @proxyindex
end

#show_schedule ⇒ `Object` (readonly)

根据节目的时间表



20
21
22

# File 'lib/grab_tvmao.rb', line 20

def show_schedule
  @show_schedule
end

#site ⇒ `Object` (readonly)

网站地址



18
19
20

# File 'lib/grab_tvmao.rb', line 18

def site
  @site
end

Instance Method Details

#conversion_what_day(whatday) ⇒ `Object`

将星期的wday获取值转化为中文名 conversion wady to chinese

# File 'lib/grab_tvmao.rb', line 137

def conversion_what_day(whatday)
  ret = "星期"
  case whatday.to_i
    when 1
      ret += "一"
    when 2
      ret += "二"
    when 3
      ret += "三"
    when 4
      ret += "四"
    when 5
      ret += "五"
    when 6
      ret += "六"
    when 7
      ret += "七"
  end
  ret
end

#del_day_num(day_num) ⇒ `Object`

前几天需要减去的num

# File 'lib/grab_tvmao.rb', line 177

def del_day_num(day_num)
  ret = day_num*60*60*24
  ret
end

#del_time_list ⇒ `Object`

前面一周要删除的日期的列表

# File 'lib/grab_tvmao.rb', line 190

def del_time_list
   ret = []
   time = Time.now
   wday = time.wday
   if(wday==1)
     for i in 0..7
       ret<<get_time_day_prior(i)
     end
   end
   ret
end

#dispose_time(num) ⇒ `Object`

如果时间为1~9的一位则为其在数字前加0补齐二位

# File 'lib/grab_tvmao.rb', line 159

def dispose_time(num)
  num = num.to_s
  if num.length < 2
    num = "0"+num
  end
  num
end

#err_doc_proxy(proxy, proxylist, url = "", err = "") ⇒ `Object`

# File 'lib/grab_tvmao.rb', line 365

def err_doc_proxy(proxy,proxylist,url="",err="")
  if proxy.empty?||proxy.nil?
    proxylist.delete_at[@proxyindex]
  end


  unless @no_firest
    @no_firest = 0
  end

  @no_firest += 1
  p "*************************Proxy:#{proxy}, url:#{url} Error:#{err}"
  #proxylist.delete(proxy)    #删除出错的代理  但如果是此网页错误则会引起BUG待修复
  @proxyindex += 1
  @proxyindex=@proxyindex%@size
  doc=get_doc_with_proxy(proxylist,url) if @no_firest<4
  unless @no_firest<4
    @no_firest=0
    raise RuntimeError,"Error: #{err}"
  end
  doc
end

#get_assign_date_url(url, start_time, day_num) ⇒ `Object`

获取制定时间和长度url start_time 为int型开始时间和今天的差值正数代表之后的第几天负数代表之前的第几天 day_num 为int型代表抓取的时间从开始时间计算的多少天

# File 'lib/grab_tvmao.rb', line 487

def get_assign_date_url(url,start_time,day_num)
  site="http://www.tvmao.com"
  if(@site)
    site=@site
  end

  _url = site
  urls = []
  _urls = url.split("-")

  time = Time.now
  _wday = time.wday
  wday = _wday + start_time
  if wday<0
    wday = 1
  end

  end_day = wday + day_num - 1

  if end_day>(_wday+7)
   end_day = _wday + 7
  end

  0.upto(1).each do |i|
    _url = _url+"#{_urls[i]}"+"-"
  end

  wday.upto(end_day).each do |i|
    urls << _url+"w#{i}.html"
  end
  urls
end

#get_doc_with_proxy(proxylist, url) ⇒ `Object`

使用代理获取url的html的doc值

# File 'lib/grab_tvmao.rb', line 390

def get_doc_with_proxy(proxylist,url)
  unless proxylist.nil?||proxylist.empty?
    unless @proxyindex
      @proxyindex = 0
    end
    @size = proxylist.size
    @proxyindex=@proxyindex%proxylist.size
    if(proxylist[@proxyindex])
      proxy = proxylist[@proxyindex]
    else
      proxy = proxylist[@proxyindex+1]
    end
    begin
      doc = Nokogiri::HTML(open(url,:proxy=>"#{proxy}").read) unless proxy.nil?||proxy.empty?
      if doc.nil?
        p "DOC is nil"
        doc=err_doc_proxy(proxy,proxylist,url,"doc nil")
        @no_firest=0
      end
      @no_firest = 0
    rescue => err
      p "IN Rescue"
      doc=err_doc_proxy(proxy,proxylist,url,err.to_s)
      @no_firest=0
      p "Get DOC"
      @proxyindex += 1
      @proxyindex=@proxyindex%@size
      return doc
    end
    @proxyindex += 1
    @proxyindex=@proxyindex%@size
  else
    begin
      doc = Nokogiri::HTML(open(url).read) if proxy.nil?||proxy.empty?
    rescue => err
      p "Error : Proxy:#{proxy}, url:#{url}"
      raise RuntimeError,"Error: #{err.to_s} Method:get_doc_with_proxy"
    end
  end
  doc
end

#get_proxy_list ⇒ `Object`

获取代理列表

# File 'lib/grab_tvmao.rb', line 706

def get_proxy_list()
  list = gg('http://www.proxycn.cn/html_proxy/30fastproxy-1.html')
  if list.count ==0
    list = gg('http://www.proxycn.cn/html_proxy/http-1.html')
  end
  ips_ports = []
  regex_port = /(?<=<TD class="list">)[0-9]*?(?=<\/TD>)/
  regex_ip   = /(?<=a href\=whois.php\?whois\=)[0-9,.]*/
  list.each do |proxy_txt|
    port = proxy_txt[regex_port]
    ip = proxy_txt[regex_ip]
    if(ip != ""&& !port.to_s.eql?('3128'))
      port_ip = ip.to_s + ":" + port.to_s
      ips_ports << port_ip
    end
  end
  p "Count: #{ips_ports.count}"
  ips_ports
end

#get_schedulelist_atday(channel, url, proxylist) ⇒ `Object`

获取某天的节目表

# File 'lib/grab_tvmao.rb', line 433

def get_schedulelist_atday(channel,url,proxylist)
  p "Grab: #{url}"
  doc = get_doc_with_proxy(proxylist,url)
  show_type = []


  _img_url = "http://static.haotv.me/channel/logo/"
  img_url = _img_url + channel+".jpg"


  data=doc.css('div[class="mt10 clear"]')[0].content.split(" ")
  date = data[0]
  week = data[1]
  p "Channel: #{channel}  Date: #{date} Week: #{week}"
  @date = "#{week}(#{date})"
  schedule_list = []

  _herf = doc.css("h1[style='float:left']").xpath('img[@src]')[0]
  img_url = _herf.get_attribute("src") if _herf

  p "**************IMG: #{img_url}"


  doc.css('ul[id="pgrow"]')[0].css("li").each do |schedule|
    _herf= schedule.xpath('a[@href]')[0]
    schedule_herf=_herf.get_attribute("href") if _herf
    unless _herf
      drama =schedule.css('a[class="drama"]')[0]
      if drama
        _herfs=drama.get_attribute("href").gsub("/episode/section","#%#")
        schedule_herf = _herfs.split("#%#")[0]
      end
    end
    if schedule.content.split(" ").size>1
      time = schedule.content.split(" ")[0]
      schedule = schedule.content.split(" ")[1]
      show_name = ""
      unless schedule_herf.nil?||schedule_herf.empty?
        p "Show_infomation:#{schedule_herf} Time:#{time}"
        show_infomation=get_show_infomation(proxylist,schedule_herf)
        show_type=show_infomation["type"]
        show_name = show_infomation["name"]
        show_img = show_infomation["img"]
      end
      p "Time: #{time} schedule: #{schedule} show_infomation_herf: #{schedule_herf}  type: #{show_type} name: #{show_name} img:#{show_img}"
      schedule_list << {"schedule_name"=>schedule,"schedule_logo"=>show_img,"schedule_start"=>time,"show_infomation_herf"=>schedule_herf,"type"=>show_type,"name"=>show_name}
    end
  end
  schedule_list
end

#get_show_infomation(proxy_list, schedule_herf) ⇒ `Object`

获取节目详细信息

# File 'lib/grab_tvmao.rb', line 601

def get_show_infomation(proxy_list,schedule_herf)
  begin
  @proxyindex = 0
  unless @site
    @site = "http://www.tvmao.com"
  end
  schedule_herf = @site + schedule_herf
  doc = get_doc_with_proxy(proxy_list,schedule_herf)
  type = []
  name = doc.css('span[itemprop="name"]')[0].content

  #获取节目的图片
  if doc.css('img[class="tvc"]')
   schedule_img_down_path = doc.css('img[class="tvc"]')[0].get_attribute('src') if doc.css('img[class="tvc"]')[0]
  end



  doc.css('span[itemprop="genre"]').each do |_type|
    type << _type.content
  end
  doc.css('a[itemprop="genre"]').each do |_type|
    type<<_type.content
  end
  url = "#{schedule_herf}/detail"
  doc = get_doc_with_proxy(proxy_list,url)
  if doc
    doc.css('span[itemprop="genre"]').each do |_type|
      type << _type.content
    end
  end
  type.uniq!
  unless @show_schedule
    @show_schedule={}
  end
  @show_schedule.merge!(name=>get_show_schedule(proxy_list,schedule_herf)) unless @show_schedule.has_key?(name)
  {"type"=>type,"name"=>name,"img"=>schedule_img_down_path}
  #rescue => e
  #  p "Error In get_show_infomation msg : #{e.to_s}"
  end
end

#get_show_schedule(proxylist, herf) ⇒ `Object`

获取节目的时间表

# File 'lib/grab_tvmao.rb', line 644

def get_show_schedule(proxylist,herf)
  url = herf + "/playingtime"
  doc = get_doc_with_proxy(proxylist,url)
  i = 0
  schedule = []
  if  doc.css('div[id="epg"]')[0]
    doc.css('div[id="epg"]')[0].css("div[class='c1 col']").each do |epg|
      unless(i==0)
        time = epg.css('div[class="f1 fld"]')[0].content
        channel_name = epg.css('div[class="f2 fld"]')[0].content
        show_name = epg.css('div[class="f3 fld"]')[0].content
        times = time.split(" ")
        week = times[0]
        date = times[1]
        _time = times[2]
        schedule << {"week"=>week,"date"=>date,"time"=>_time,"channel_name"=>channel_name,"show_name"=>show_name}
      end
      i += 1
    end
  end
  schedule
end

#get_show_type(channel, url, date, time, proxylist) ⇒ `Object`

批量从tvmao获取节目类型 channel 节目表属于的屏道 url 节目表获取的网络地址 date 日期 time 节目开始时间 proxylist 代理列表

# File 'lib/grab_tvmao.rb', line 79

def get_show_type(channel,url,date,time,proxylist)
  url = get_show_type_url(url,date)
  schedules = get_schedulelist_atday(channel,url,proxylist)
  _time_num = time.gsub(":","").to_i
  type = nil
  schedules.each do |schedule|
    schedule_time_num = schedule["schedule_start"].gsub(":","").to_i
    if _time_num==schedule_time_num
      type = schedule["type"]
    end
  end
  if type
    return type
  else
    return []
  end
end

#get_show_type_by_batch(channel, url, date, schedule, proxylist) ⇒ `Object`

批量从tvmao获取节目类型 channel 节目表属于的屏道 url 节目表获取的网络地址 date 日期 schedule 需要批量修改的时间表 proxylist 代理列表

# File 'lib/grab_tvmao.rb', line 41

def get_show_type_by_batch(channel,url,date,schedule,proxylist)
  _schedule =  {}
  schedule.each do |s|
   time = s["schedule_start"].gsub(":","").to_i
   _schedule.merge!(time=>s)
  end
  url = get_show_type_url(url,date)
  schedules = get_schedulelist_atday(channel,url,proxylist)
  type = nil
  schedules.each do |schedule|
    schedule_time_num = schedule["schedule_start"].gsub(":","").to_i
    if _schedule.has_key?(schedule_time_num)
      _schedule[schedule_time_num]["type"]=_schedule[schedule_time_num]["type"]|schedule["type"]
      p "*****************************************************************************************"
      p "Schedule: #{_schedule[schedule_time_num]}"
      p "schedule_logo_1: #{_schedule[schedule_time_num]["schedule_logo"]}"
      p "schedule_logo_2: #{_schedule[schedule_time_num][:schedule_logo]}"
      if _schedule[schedule_time_num]["schedule_logo"]==""
        unless schedule["img"]==""
          _schedule[schedule_time_num]["schedule_logo"]=schedule["img"]
        end
      end
    end
  end
  ret = []
  _schedule.each do |key,value|
    ret << value
  end

  ret
end

#get_show_type_url(url, date) ⇒ `Object`

# File 'lib/grab_tvmao.rb', line 97

def get_show_type_url(url,date)
  whatday = 0
  _date = date.split("(")[0]
  case _date
    when "星期一"
      whatday=1
    when "星期二"
      whatday=2
    when "星期三"
      whatday=3
    when "星期四"
      whatday=4
    when "星期五"
      whatday=5
    when "星期六"
      whatday=6
    when "星期日"
      whatday=7
  end

  get_week_url = lambda {|url,whatday|
    _url = "http://www.tvmao.com"
    urls = []
    _urls = url.split("-")
    0.upto(1).each do |i|
      _url = _url+"#{_urls[i]}"+"-"
    end
      url = _url+"w#{whatday}.html"
    return url
  }
  return get_week_url.call(url,whatday)
end

#get_time_day_prior(num) ⇒ `Object`

获取距离当前多少天的之前的日期

# File 'lib/grab_tvmao.rb', line 183

def get_time_day_prior(num)
  time = Time.now - del_day_num(num)
  ret = get_week_date_time(time)
  ret
end

#get_topfast_list(use_time) ⇒ `Object`

获取指定访问速度的代理服务器 time为最慢速度的时间 int型代表秒

# File 'lib/grab_tvmao.rb', line 672

def get_topfast_list(use_time)
  fast_list = []
  time_use = 0
  ips_ports = get_proxy_list()
  ips_ports.each do |ip_port|
    time_start = Time.now.to_i
    begin
      timeout(use_time) do
        doc = Nokogiri::HTML(open("http://www.tvmao.com/program",:proxy=> "http://#{ip_port}"))
      end
      time_end = Time.now.to_i
      time_use = time_end - time_start
      p  "http://#{ip_port}   use_time:#{time_use}"
    rescue Exception =>e
      case e
        when Errno::ETIMEDOUT
          p "Use http://#{ip_port} timeout"
        when Timeout::Error
          p "Use http://#{ip_port} timeout"
        when Errno::ECONNREFUSED
          p "Use http://#{ip_port} Error connection"
        else
          p "Use http://#{ip_port} Error:#{e.to_s}"
      end
      time_use = -1
    end
    if(time_use > 0 &&time_use < 8)
      fast_list << ip_port
    end
  end
  fast_list
end

#get_week_date_time(time) ⇒ `Object`

转化当前时间的格式

# File 'lib/grab_tvmao.rb', line 168

def get_week_date_time(time)
  month = time.month
  day = time.day
  whatday = time.wday
  ret = conversion_what_day(whatday) + "(" + dispose_time(month) + "-"+dispose_time(day)+")"
  ret
end

#getchannels(img_dir_path) ⇒ `Object`

获取网站的频道表 img_path 图片存放路径

# File 'lib/grab_tvmao.rb', line 320

def getchannels(img_dir_path)
  @channel = []
  @site=DEFAULT_SITE
  @proxyindex = 0
  @img_down_dir_path = img_dir_path
  @img_down_file = File.new(File.join(img_dir_path,"channel_img_down_path"),'w+')

  channel_urls = {}
  channel_info = {}
  get_url =lambda { |type|
    @site + "/program/duration/#{type}/w1.html" unless (type.nil?||type.empty?)
      }

  get_channel_id = lambda {|url|
    channel_id = url.split("/")[2].split("-")[1] unless (url.nil?||url.empty?)
  }

  DEFAULT_GrabtvType.each do |type|
    url = get_url.call(type)
    p url
    doc = Nokogiri::HTML(open(url))
    p doc.content
    p "*************************************************************"
    doc.css('td[class="tdchn"]').each do |td|
     channel_name=td.content
     herf = ""
     td.css('a').each do |a|
      herf=a['href']
     end
      channel_id = get_channel_id.call(herf)

     #获取频道图片的地址
      img_path = "http://static.haotv.me/channel/logo/#{channel_id}.jpg"
      @img_down_file.puts("#{channel_id}:#{img_path}")
      @channel<<({channel_id=>{name:channel_name,herf:herf,type:type}})
      channel_info.merge!({channel_id=>{"channel_name"=>channel_name,"channel_type"=>type,"channel_id"=>channel_id,"img_path"=>img_path}})
      channel_urls.merge!({channel_id=>herf})
    end
  end
  @img_down_file.close
  p "Channel: #{@channel}"
  {"channel_info"=>channel_info,"channel_urls"=>channel_urls}
end

#getschedule(channel, herf, proxylist, day_num = 7, img_dir_down_path = @img_down_dir_path) ⇒ `Object`

因原已调用所以保留获取一周节目表

# File 'lib/grab_tvmao.rb', line 557

def getschedule(channel,herf,proxylist,day_num=7,img_dir_down_path=@img_down_dir_path)
  p "Day Num is #{day_num}"
  begin
    day_num = 1 if day_num<1
  rescue
    day_num = 1
  end
  site="http://www.tvmao.com"
  unless img_dir_down_path
    img_dir_down_path = __FILE__
  end
  @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")

  if(@site)
    site=@site
  end
  _img_url = "http://static.haotv.me/channel/logo/"
  @show_schedule = {}

  get_week_url = lambda {|url,day_num|
     _url = site
     urls = []
     _urls = url.split("-")
     0.upto(1).each do |i|
      _url = _url+"#{_urls[i]}"+"-"
     end
    1.upto(day_num).each do |i|
      urls << _url+"w#{i}.html"
    end
    urls
  }

  channel_schedule = {}
  get_week_url.call(herf,day_num).each do |url|
    @date = ""
    schedule_list = get_schedulelist_atday(channel,url,proxylist)
    channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
  end
  @img_down_file.close
  {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
end

#getScheduleAssignDate(channel, herf, proxylist, start_num, day_num = 0, img_dir_down_path = @img_down_dir_path) ⇒ `Object`

获取指定时间段的节目表

# File 'lib/grab_tvmao.rb', line 523

def getScheduleAssignDate(channel,herf,proxylist,start_num,day_num=0,img_dir_down_path=@img_down_dir_path)
  begin
    day_num = 1 if day_num<1
  rescue
    day_num = 1
  end
  site="http://www.tvmao.com"
  unless img_dir_down_path
    img_dir_down_path = __FILE__
  end
  @img_down_file = File.new(File.join(img_dir_down_path,"schedule_img_down_path"),"w+")

  if(@site)
    site=@site
  end
  _img_url = "http://static.haotv.me/channel/logo/"
  @show_schedule = {}

  channel_schedule = {}
  get_assign_date_url(herf,start_num,day_num).each do |url|
    @date = ""
    schedule_list = get_schedulelist_atday(channel,url,proxylist)
    channel_schedule.merge!({@date=>schedule_list}) unless @date.empty?
  end
  @img_down_file.close
  {"channel_schedule"=>channel_schedule,"show_schedule"=>@show_schedule}
end

#gg(url) ⇒ `Object`

# File 'lib/grab_tvmao.rb', line 726

def gg(url)
  regex_list = /<TD class="list">.*<\/TD>/
  href =URI.parse(url)
  contxt = ""
  href.open{ |f|
    f.each_line {|line| contxt =contxt + line + "\n"}
  }
  list = contxt.scan(regex_list)
end

#save_img ⇒ `Object`



736
737
738

# File 'lib/grab_tvmao.rb', line 736

def save_img

end

#start ⇒ `Object`

调用此方法的例子

# File 'lib/grab_tvmao.rb', line 206

def start
#作用是获取俩个字符串的相似度
#get str1 and str2 similarity
get_similarity_string = lambda { |str1,str2|
  _length = 0
  type = 0
  if str1.length>str2.length
    _length=str2.length
    type = 2
  else
    _length=str1.length
    type =1
  end
  _str_list = []
  _str = ""
  for i in 0.._length
    case type
      when 2
        n=i
        0.upto(str1.length-1).each do |j|
          p "N: #{n}"
          if(str2[n]==str1[j])
            _str =_str+str2[n]
            n = n+1
            p "Str = #{_str}"
          else
            _str_list << _str
            _str = ""
          end
        end
      when 1
        n=i
        0.upto(str2.length-1).each do |j|
          p "N: #{n}"
          if(str1[n]==str2[j])
            _str =_str+str1[n]
            n=n+1
            p "Str = #{_str}"
          else
            _str_list << _str
            _str = ""
          end
        end
    end
  end
  p _str_list
  _str = ""
  _str_list.each do |str|
    if _str.length<str.length
      _str=str
    end
  end
  _str
}


path = "/home/zql/workspace/New/smart_remote/img_path"
channel_list = GrabTvmao.getchannels(path)
channel_urls = channel_list['channel_urls']
channel_infos = channel_list['channel_info']
p "Channel img save file,path='#{GrabTvmao.img_down_path}'"
proxy_list=GrabTvmao.get_topfast_list(5)  #get_topfast_list 参数是代表最慢用时 单位秒


#Use for Test

p "************************************"
p "proxy_list:#{proxy_list}"
p "************************************"

bool_start = false


channel_urls.each do |channel,url|

  if(channel=="CCTV16")
    bool_start = true
  end

  if bool_start
    previous_show_name = ""
    channel_info = channel_infos[channel]
    channel_name = channel_info["channel_name"]
    channel_type = channel_info["channel_type"]
    channel_id = channel_info["channel_id"]
    channel_img_path = channel_info["img_path"]

    #channel,herf,proxylist,day_num=7


    start_time=0
    use_num =1

    #getScheduleAssignDate参数：
    # channel 频道
    # herf 频道地址
    # proxylist 代理列表
    # start_num 开始时间 int 为开始时间与今天的差值 正数代表今天之后的第几天   负数代表今天之前的第几天
    # day_num 抓取的时间段天数
    # img_dir_down_path 图片网络地址保存路径 有默认值 可不设置
    schedule_list=GrabTvmao.getScheduleAssignDate(channel,url,proxy_list,start_time,use_num)  #抓取的七天后的1天的数据


    end
  end
end

Class: Grabepg::GrabTvmao

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ GrabTvmao

Instance Attribute Details

#channel ⇒ Object (readonly)

#img_down_path ⇒ Object (readonly)

#proxyindex ⇒ Object (readonly)

#show_schedule ⇒ Object (readonly)

#site ⇒ Object (readonly)

Instance Method Details

#conversion_what_day(whatday) ⇒ Object

#del_day_num(day_num) ⇒ Object

#del_time_list ⇒ Object

#dispose_time(num) ⇒ Object

#err_doc_proxy(proxy, proxylist, url = "", err = "") ⇒ Object

#get_assign_date_url(url, start_time, day_num) ⇒ Object

#get_doc_with_proxy(proxylist, url) ⇒ Object

#get_proxy_list ⇒ Object

#get_schedulelist_atday(channel, url, proxylist) ⇒ Object

#get_show_infomation(proxy_list, schedule_herf) ⇒ Object

#get_show_schedule(proxylist, herf) ⇒ Object

#get_show_type(channel, url, date, time, proxylist) ⇒ Object

#get_show_type_by_batch(channel, url, date, schedule, proxylist) ⇒ Object

#get_show_type_url(url, date) ⇒ Object

#get_time_day_prior(num) ⇒ Object

#get_topfast_list(use_time) ⇒ Object

#get_week_date_time(time) ⇒ Object

#getchannels(img_dir_path) ⇒ Object

#getschedule(channel, herf, proxylist, day_num = 7, img_dir_down_path = @img_down_dir_path) ⇒ Object

#getScheduleAssignDate(channel, herf, proxylist, start_num, day_num = 0, img_dir_down_path = @img_down_dir_path) ⇒ Object

#gg(url) ⇒ Object

#save_img ⇒ Object

#start ⇒ Object