Class: GoogleVideo::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/google-video.rb

Overview

The main client object providing interface methods for retrieving information from the Google Video server.

Constant Summary collapse

@@DEFAULT_HOST =

the default hostname queried to retrieve google video content.

'video.google.com'
@@DEFAULT_AGENT =

the default user agent submitted with http requests of google video.

'google-video for Ruby (http://www.rubyforge.org/projects/google-video/)'

Instance Method Summary collapse

Constructor Details

#initialize(params = nil) ⇒ Client

Constructs a Client for querying the Google Video server. Optional parameters to be specified as a hash include:

  • host: optional alternate host name to query instead of the default host.

  • agent: optional alternate user agent to submit with http requests instead of the default agent.



446
447
448
449
450
# File 'lib/google-video.rb', line 446

def initialize (params = nil)
  @host = @@DEFAULT_HOST
  @agent = @@DEFAULT_AGENT
  params.each { |key, value| instance_variable_set('@' + key.to_s, value) } if params
end

Instance Method Details

#top_videos(top_request) ⇒ Object

Looks up top videos on Google Video with the parameters specified in the supplied TopVideosRequest and returns a TopVideosResponse.



663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
# File 'lib/google-video.rb', line 663

def top_videos (top_request)
  # validate parameters
  if !top_request.is_a?(TopVideosRequest)
    raise ArgumentError.new("invalid argument, request must be a GoogleVideo::TopVideosRequest") 
  end

  # gather response data from the server
  url = _top_videos_url(top_request)
  response = _request(url)
  doc = Hpricot(response)

  # parse out each of the top video entries
  top_videos = []
  # grab the top 100 table rows
  rows = doc/"table[@class='table-top100']/tr"
  # the first row is just header info, so skip it
  rows.shift
  # there's one video per row, so we iterate over the table row elements
  rows.each do |row|
    # break the table cells into logically-named elements we can manipulate more precisely
    (td_movement, td_rank_today, td_rank_yesterday, td_thumbnail, td_detail) = (row/"td")

    # parse the rank movement direction
    movement_html = (td_movement/"img").to_html
    if (movement_html =~ /up\.gif/)
      movement = 1
    elsif (movement_html =~ /down\.gif/)
      movement = -1
    else
      movement = 0
    end

    # parse today and yesterday's rank
    rank_today = td_rank_today.inner_html.to_i
    rank_yesterday = td_rank_yesterday.inner_html.to_i

    # parse the video thumbnail image
    thumbnail_image_url = _decode_html((td_thumbnail%"a/img")['src'])
    
    # parse the detailed video info
    a_video = (td_detail%"a")
    page_url = "http://#{@host}#{a_video['href']}"

    # title
    title = _decode_html(a_video.inner_html.strip)

    # stars
    star_count = _parse_star_elements(td_detail/"div[@class='meta']/span/font/img[@class='star']")

    # rating count
    span_raters = (td_detail%"div[@class='meta']/span/font/span[@id='numOfRaters']")
    rating_count = (span_raters) ? span_raters.inner_html.to_i : 0

    # duration
    duration = (td_detail%"div[@class='meta']").all_text.gsub(/ /, '').strip

    # description
    description = _decode_html((td_detail).all_text.strip)
    
    # construct the video object
    video = Video.new(:title => title,
                      :page_url => page_url,
                      :thumbnail_image_url => thumbnail_image_url,
                      :star_count => star_count,
                      :rating_count => rating_count,
                      :duration => duration,
                      :description => description)

    # create the top video entry and throw it on the list of top videos
    top_videos << TopVideo.new(:movement => movement,
                               :rank_today => rank_today,
                               :rank_yesterday => rank_yesterday,
                               :video => video)
  end
  
  TopVideosResponse.new(:request_url => url, 
                        :videos => top_videos)
end

#video_details(details_request) ⇒ Object

Looks up detailed information on a specific Video on Google Video with the parameters specified in the supplied VideoDetailsRequest and returns a VideoDetailsResponse.



532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
# File 'lib/google-video.rb', line 532

def video_details (details_request)
  # validate parameters
  if !details_request.is_a?(VideoDetailsRequest)
    raise ArgumentError.new("invalid argument, request must be a GoogleVideo::VideoDetailsRequest") 
  end

  # gather response data from the server
  url = _video_details_url(details_request)
  response = _request(url)
  doc = Hpricot(response)
  
  # parse title
  title = (doc/"div[@id='pvprogtitle']").inner_html.strip

  # parse description
  font_description = (doc%"div[@id='description']/font")
  description = (font_description) ? font_description.all_text.strip : ''
  span_wholedescr = (doc%"span[@id='wholedescr']")
  if (span_wholedescr)
    description += ' ' + span_wholedescr.all_text.strip
  end
  description = _decode_html(description)
  
  # parse star count
  span_rating = (doc%"span[@id='communityRating']")
  star_count = _parse_star_elements(span_rating/"img[@class='star']")

  # parse rating count
  span_raters = (doc%"span[@id='numOfRaters']")
  rating_count = (span_raters) ? span_raters.inner_html.to_i : 0

  # parse upload user, duration, upload date, upload user domain, upload
  # user url.  unfortunately this is a bit messy since, unlike much of the
  # rest of google's lovely html, there are no useful id or class names we
  # can hang our hat on.  rather, there are anywhere from one to three
  # rows of text, with only the middle row (in the three-row scenario)
  # containing duration and upload date, omnipresent.  still, we buckle
  # down and have at it with fervor and tenacity.
  duration_etc_html = (doc%"div[@id='durationetc']").inner_html
  duration_parts = duration_etc_html.split(/<br[^>]+>/)
  # see if the first line looks like it has a date formatted ala 'Nov 9, 2006'
  if (duration_parts[0] =~ /\-  [A-Za-z]{3} \d+, \d{4}/)
    # first line is duration / upload_date, and there is no upload username
    upload_user = ''
    duration_upload_html = duration_parts[0]
    upload_user_domain = duration_parts[1]
  else
    upload_user = _clean_string(duration_parts[0])
    duration_upload_html = duration_parts[1]        
    upload_user_domain = duration_parts[2]
  end

  # parse the duration and upload date
  ( duration, upload_date ) = duration_upload_html.split(/\-/)
  duration = _clean_string(duration)
  upload_date = Time.parse(_clean_string(upload_date))

  # parse the upload user url and domain if present
  if (upload_user_domain =~ /<a.*?href="([^"]+)"[^>]+>([^<]+)<\/a>/)
    upload_user_url = "http://#{@host}#{_decode_html(_clean_string($1))}"
    upload_user_domain = _clean_string($2)
  else
    upload_user_url = ''
    upload_user_domain = ''
  end
  
  # pull out view count and rank info table row elements
  tr_statsall = (doc/"div[@id='statsall']/table/tr")

  # remove the first row which just contains header info
  tr_statsall.shift

  # parse each of the view count and rank rows
  (view_count, rank) = _parse_statsall_row(tr_statsall.shift)
  (view_count_yesterday, rank_yesterday) = _parse_statsall_row(tr_statsall.shift)
  (view_count_email, rank_email) = _parse_statsall_row(tr_statsall.shift)
  (view_count_embed, rank_embed) = _parse_statsall_row(tr_statsall.shift)

  # pull out the url to the video .gvp file if prsent
  img_download = (doc%"img[@src='/static/btn_download.gif']")
  if (img_download)
    onclick_html = img_download['onclick']
    onclick_script = _decode_html(onclick_html)
    onclick_script =~ /onDownloadClick\(([^\)]+)\)/
    video_file_url = onclick_script.split(",")[1].gsub(/"/, '')
  else
    video_file_url = ''
  end

  # pull out the video frame thumbnails
  video_frame_thumbnails = []
  (doc/"img[@class='detailsimage']").each do |frame_image|
    video_frame_thumbnails << _parse_video_frame_thumbnail(frame_image)
  end
  
  # pull out the playlist entries
  playlist_entries = []
  table_upnext = (doc%"table[@id='upnexttable']")
  (table_upnext/"tr").each do |tr_playlist|
    playlist_entries << _parse_playlist_entry(tr_playlist)
  end

  # create the new, fully populated video record
  video = Video.new(:description => description,
                    :duration => duration,
                    :page_url => url,
                    :playlist_entries => playlist_entries,
                    :rank => rank,
                    :rank_yesterday => rank_yesterday,
                    :rank_email => rank_email,
                    :rank_embed => rank_embed,
                    :rating_count => rating_count,
                    :star_count => star_count,
                    :title => title,
                    :upload_date => upload_date,
                    :upload_user => upload_user,
                    :upload_user_domain => upload_user_domain,
                    :upload_user_url => upload_user_url,
                    :video_file_url => video_file_url,
                    :video_frame_thumbnails => video_frame_thumbnails,
                    :view_count => view_count,
                    :view_count_yesterday => view_count_yesterday,
                    :view_count_email => view_count_email,
                    :view_count_embed => view_count_embed)

  # build and return the response
  VideoDetailsResponse.new(:request_url => url, :video => video)
end

#video_search(search_request) ⇒ Object

Runs a search query on Google Video with the parameters specified in the supplied VideoSearchRequest and returns a VideoSearchResponse.



454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
# File 'lib/google-video.rb', line 454

def video_search (search_request)
  # validate parameters
  if !search_request.is_a?(VideoSearchRequest)
    raise ArgumentError.new("invalid argument, request must be a GoogleVideo::VideoSearchRequest") 
  end

  # gather response data from the server
  url = _search_url(search_request)
  response = _request(url)
  doc = Hpricot(response)

  # parse the overall search query stats
  regexp_stats = Regexp.new(/([0-9,]+) \- ([0-9,]+)<\/b> of about <b>([0-9,]+)<\/b> \(<b>([0-9.]+)/)
  row = (doc%"#resultsheadertable/tr/td/font")
  if !regexp_stats.match(row.inner_html)
    raise GoogleVideoException.new("failed to parse search query stats")
  end
  ( start_index, end_index, total_result_count, execution_time ) = [ $1.to_i, $2.to_i, $3.to_i, $4.to_f ]

  # parse the video results
  videos = []
  rows = doc/"table[@class='searchresult']/tr"
  rows.each do |row|
    # parse the thumbnail image
    thumbnail_image_url = _decode_html((row%"img[@class='searchresultimg']")['src'])

    # parse the title and page url
    a_title = (row%"div[@class='resulttitle']/a")
    page_url = "http://#{@host}/#{_decode_html(a_title['href'])}"
    title = _decode_html(a_title.inner_html.strip)

    # parse the description text
    description = _decode_html((row%"div[@class='snippet']").inner_html.strip)

    # parse the upload username
    span_channel = (row%"span[@class='channel']")
    channel_html = (span_channel) ? span_channel.inner_html : ''
    channel_html =~ /([^\-]+)/
    upload_user = _clean_string($1)

    # stars
    star_count = _parse_star_elements(row/"img[@class='star']")

    # rating count
    span_raters = (row%"span[@id='numOfRaters']")
    rating_count = (span_raters) ? span_raters.inner_html.to_i : 0

    # duration
    span_date = (row%"span[@class='date']")
    date_html = span_date.inner_html
    date_html =~ /([^\-]+) \- (.*)$/
    duration = _clean_string($1)
    upload_date = Time.parse(_clean_string($2))
    
    # construct the video object and tack it onto the video result list
    videos << Video.new(:title => title,
                        :page_url => page_url,
                        :thumbnail_image_url => thumbnail_image_url,
                        :description => description,
                        :star_count => star_count,
                        :rating_count => rating_count,
                        :duration => duration,
                        :upload_date => upload_date,
                        :upload_user => upload_user)
  end

  # construct the final search response with all info we've gathered
  VideoSearchResponse.new(:request_url => url,
                          :start_index => start_index,
                          :end_index => end_index,
                          :total_result_count => total_result_count,
                          :execution_time => execution_time,
                          :videos => videos)
end