Package elisa :: Package plugins :: Package ugly :: Package stage6 :: Module stage_media
[hide private]
[frames] | no frames]

Source Code for Module elisa.plugins.ugly.stage6.stage_media

  1  # -*- coding: utf-8 -*- 
  2  # Elisa - Home multimedia server 
  3  # Copyright (C) 2006-2008 Fluendo Embedded S.L. (www.fluendo.com). 
  4  # All rights reserved. 
  5  # 
  6  # This file is available under one of two license agreements. 
  7  # 
  8  # This file is licensed under the GPL version 3. 
  9  # See "LICENSE.GPL" in the root of this distribution including a special 
 10  # exception to use Elisa with Fluendo's plugins. 
 11  # 
 12  # The GPL part of Elisa is also available under a commercial licensing 
 13  # agreement from Fluendo. 
 14  # See "LICENSE.Elisa" in the root directory of this distribution package 
 15  # for details on that license. 
 16   
 17  """ 
 18  StageMedia component class 
 19  """ 
 20   
 21  __maintainer__ = 'Florian Boucault <florian@fluendo.com>' 
 22  __maintainer2__ = 'Benjamin Kampmann <benjamin@fluendo.com>' 
 23   
 24  from elisa.base_components.media_provider import MediaProvider, UriNotMonitorable 
 25  from elisa.core.media_uri import MediaUri, unquote 
 26  from elisa.core.utils import deferred_action 
 27  from elisa.core import common 
 28  from elisa.core.bus import bus_message 
 29   
 30  from elisa.extern.configobj import Section 
 31   
 32  from elisa.core.observers.dict import DictObservable 
 33   
 34  from twisted.internet import defer, threads 
 35  from twisted.internet import reactor 
 36  from twisted.web import client 
 37  #from twisted.web.client import HTTPDownloader 
 38   
 39  from BeautifulSoup import BeautifulSoup 
 40   
 41  import urllib2, urllib, cookielib 
 42  import re 
 43  from mutex import mutex 
 44   
 45  from elisa.extern.translation import gettexter, N_ 
 46  T_ = gettexter('elisa-stage6') 
 47   
 48   
49 -class StageParser:
50 51 """ 52 This class implements a parser to retrieve video titles and 53 URL from a Stage6 HTML page 54 """ 55 56 # Some regexps that will help retrieve the data we are looking 57 # for in HTML pages 58 reg_href = re.compile("href=\"(.*)\"") 59 reg_href_avatar = re.compile("href=\"(.*)\"><acronym") 60 reg_img = re.compile("alt=\"(.*)\" src=\"(.*)\"") 61 reg_time = re.compile("<img (.*)/></acronym>(.*)") 62 reg_img_avatar = re.compile("src=\"(.*)\" alt=") 63 reg_img_title = re.compile("<acronym title=\"(.*)\" class=\"no-border\">") 64 reg_title = re.compile("title=\"(.*)\">(.*)</a></") 65 reg_video_id = re.compile("video/(.*)/") 66 reg_pages = re.compile(">(.*)</a>") 67 reg_watch_type = re.compile("<div class=\"user-watch\" id=\"(.*)\">") 68
69 - def __init__(self, string_to_parse):
70 """ 71 @param string_to_parse: the HTML code to parse 72 @type string_to_parse: string 73 """ 74 75 self._to_parse = string_to_parse
76 77
78 - def get_tags(self):
79 """ 80 Returns a list of tags as strings. This is parsing the 81 HTML data we have in self._to_parse 82 83 @rtype: list of strings 84 """ 85 86 tags = [] 87 # In case the Stage6 website is having two many connections 88 could_not_connect = '<!-- Could not connect to MySQL host: Too many connections -->' 89 if self._to_parse.startswith(could_not_connect): 90 tags.append("Could not connect") 91 return tags 92 93 # BeautifulSoup is going to help us find the code we're looking for 94 b = BeautifulSoup(self._to_parse) 95 res = b.findAll('ul', attrs={'class': 'tags-drill'}) 96 97 if len(res): 98 # Tag names are between <li> marks 99 res = res[0].findAllNext('li') 100 for tag in res: 101 t = tag.contents[0] 102 for i in t.attrs: 103 if i[0] == 'class': 104 title = t.attrs[0][1] 105 tags.append(title) 106 break 107 108 return tags
109
110 - def get_pages(self):
111 """ 112 Returns an integer representing the last page. If there are no pages 113 we return a zero 114 """ 115 b = BeautifulSoup(self._to_parse) 116 117 res = b.findAll('a', attrs={'class' : 'pagination-number'}) 118 if len(res) == 0: 119 return 0 120 121 last_page = self.reg_pages.search(str(res[len(res)-1])).groups()[0] 122 123 return last_page
124
125 - def get_watchlist(self):
126 """ 127 Returns a list of dictionaries, which look like this: 128 {'label' : '', 'href' : '', 'img': ''} 129 """ 130 131 b = BeautifulSoup(self._to_parse) 132 133 res = b.findAll('div', attrs={'class' : 'user-watch'}) 134 135 136 list = [] 137 138 for div in res: 139 type = self.reg_watch_type.search(str(div)).groups()[0] 140 avatar = div.find('div', attrs={'class': 'avatar'}) 141 link = self.reg_href_avatar.search(str(avatar)).groups()[0] 142 avatar_img = self.reg_img_avatar.search(str(avatar)).groups()[0] 143 title = self.reg_img_title.search(str(avatar)).groups()[0] 144 145 if type.startswith('user'): 146 link = "%s/videos/group:uservideos" % link 147 else: 148 link = "%s/videos/" % link 149 150 list.append({'label' : unquote(title), 'href' : link, 'img' : avatar_img}) 151 152 return list
153
154 - def get_videos(self):
155 """ 156 Returns a list of videos, with their name, URL and thumbnail. This is parsing the 157 HTML data we have in self._to_parse 158 159 @rtype : list of (string, string, string) 160 """ 161 162 videos = [] 163 164 # Video info are between <div> marks with their class='video-title' 165 b = BeautifulSoup(self._to_parse) 166 resu = b.findAll('div', attrs={'class': 'video'}) 167 168 for res in resu: 169 vid_tit = res.find('div', attrs={'class' : 'video-title'}) 170 href = res.findAll('img') 171 href = href[len(href)-1] 172 if not vid_tit: 173 continue 174 175 line = str(vid_tit) 176 overlay = str(res.find('div', attrs={'class' : 'video-overlay'})) 177 times = self.reg_time.search(overlay) 178 if times: 179 time = times.groups()[1] 180 else: 181 time = None 182 183 184 185 # get the href to retrieve video id 186 match = self.reg_href.search(line) 187 if not match: 188 continue 189 190 # retrieve the thumbnail location 191 imgp = self.reg_img.search(str(href)) 192 if imgp: 193 img = imgp.groups()[1] 194 else: 195 img = '' 196 197 video_id = self.reg_video_id.search(match.groups()[0]) 198 199 if video_id: 200 title = self.reg_title.search(line) 201 if title: 202 # Finally we add the video id, its title and thumbnail location 203 videos.append((video_id.groups()[0], 204 title.groups()[1], 205 img, time)) 206 207 return videos
208 209 210 """ 211 class Downloader(HTTPDownloader): 212 213 def pageEnd(self): 214 215 print "got file" 216 """ 217 218
219 -class StageMedia(MediaProvider):
220 """ 221 This class implements stage6 video website browsing support 222 http://stage6.divx.com 223 """ 224 225 # URL where we are going to look for video tags 226 MAIN_URL = "http://stage6.divx.com/" 227 TAGS_URL = MAIN_URL + "videos/" 228 # URL where the videos are actually located 229 VIDEOS_URL = "stage6://video.stage6.com/" 230 231 config_doc = {'email' : 'The email which is registered at stage6, optional', 232 'password' : 'the password', 233 'pages' : "Show the 'other pages'-item, experimental. See" 234 " CONFIGURATION for more informations", 235 'certain_uris': "This is a list of other Stage6-Uris, see" 236 " CONFIGURATION in the plugins-directory for" 237 " more informations.", 238 'max_pages' : "The maximum number of other pages to show" 239 } 240 241 default_config = {'email' : '', 242 'password' : '', 243 'pages' : 0, 244 'max_pages' : 99, 245 'certain_uris': [] 246 } 247 248
249 - def __init__(self):
250 """ 251 We init here also the base class. Caching of directory listng 252 will done in the self._cache dict, protected from concurrent 253 access by a mutex 254 """ 255 256 MediaProvider.__init__(self) 257 258 # We create a cache of retrieved results 259 self._cache = {} 260 self._mutex = mutex() 261 self._loggedIn = False 262 self._more_pages = 0 263 self._max_pages = 99 264 self._certain_uris = {} 265 266 # Here we create a DeferredActionManager, which permits us 267 # to manage a queue of deferred actions. This is useful 268 # for providers which uses a data protocol which can take a 269 # lot of resources, in order to have only one request at a time 270 self._def_action = deferred_action.DeferredActionsManager() 271 272 self._cookie = cookielib.LWPCookieJar() 273 # SetUp CookieStuff 274 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self._cookie)) 275 urllib2.install_opener(opener)
276 277
278 - def initialize(self):
279 uri = "stage6:///" 280 action_type = bus_message.MediaLocation.ActionType.LOCATION_ADDED 281 msg = bus_message.InternetLocation(action_type, "Stage6", 'stage6', uri, 282 media_types=['video',], 283 theme_icon='stage6') 284 common.application.bus.send_message(msg) 285 self._more_pages = self.config.get('pages', 0) 286 self._max_pages = self.config.get('max_pages', 99) 287 uris = self.config.get('certain_uris', []) 288 289 if isinstance(uris, list): 290 291 for uri in uris: 292 self._certain_uris[uri] = None 293 if isinstance(self.config, Section): 294 for section in self.config.sections: 295 if section in self._certain_uris.keys(): 296 pass 297 ## TODO: make this work! 298 # print type(section), section, dir(section) 299 # label = section.get('label', None) 300 # self._certain_uris[section] = label 301 else: 302 self.warning("Could not read configuration options certain_uris." 303 "Maybe it is not a list?") 304 305 306 # The logIn should not block ;) 307 return threads.deferToThread(self._logIn)
308 309
310 - def clean(self):
311 # self.config.set('certain_uris', self._certain_uris) 312 MediaProvider.clean(self)
313 314
315 - def _logIn(self):
316 317 email = self.config.get('email', '') 318 pw = self.config.get('password', '') 319 320 if email != -1 and pw != '': 321 322 self.debug("Loggin in with %s:%s" % (email, pw)) 323 data = urllib.urlencode({'email' : email, 324 'password' : pw, 325 'account' : 'true'}) 326 req = urllib2.Request('http://stage6.divx.com/users/login/login-post', 327 data) 328 329 try: 330 handler = urllib2.urlopen(req) 331 except urllib2.URLError, u: 332 self.warning("Could not log into stage6 server: %s" % u) 333 return 334 else: 335 res = handler.read() 336 if res.find('Log out') != -1: 337 self.debug("Logged In") 338 self._loggedIn = True 339 return 340 341 self.warning("Could not log in. Maybe you should check your login" 342 " infromations")
343 344
346 # We do not need media scanning. We can provide the metadata ourselves 347 return {}
348
350 return { 'stage6': 0 }
351
352 - def get_media_type(self, uri):
353 return self._def_action.insert_action(0, self._get_media_type, uri)
354
355 - def _get_media_type(self, uri):
356 # If the uri starts with the stage6 video domain 357 # name, we know it is a video. Otherwise it is 358 # considered as a directory 359 if repr(uri).startswith(self.VIDEOS_URL): 360 return { 'file_type' : 'video', 361 'mime_type' : '' } 362 else: 363 return { 'file_type' : 'directory', 364 'mime_type' : '' }
365
366 - def is_directory(self, uri):
367 return self._def_action.insert_action(0, self._is_directory, uri)
368
369 - def _is_directory(self, uri):
370 # if the uri doesn't start with the stage6 video domain, 371 # we know it is a directory 372 return not repr(uri).startswith(self.VIDEOS_URL)
373
374 - def has_children(self, uri):
375 return self._def_action.insert_action(0, self._has_children, uri)
376
377 - def _has_children(self, uri):
378 # We can consider that a video tag we have found on the stage6 379 # always have videos linked to. 380 return self.is_directory(uri)
381
382 - def has_children_with_types(self, uri, media_types):
383 return self._def_action.insert_action(0, self._has_children_with_types, uri, media_types)
384
385 - def _has_children_with_types(self, uri, media_types):
386 if 'video' in media_types: 387 return self._is_directory(uri) 388 else: 389 return False
390
391 - def _get_cached_uri(self, uri, children, add_info):
392 """ 393 Return the list of children from a parent URI, 394 or None if this URI has not yet been cached 395 """ 396 self._mutex.testandset() 397 398 ret = None 399 # If we have the uri cached, return it 400 if self._cache.has_key(repr(uri)): 401 for i in self._cache[repr(uri)]: 402 if add_info: 403 children.append(i) 404 else: 405 children.append(i[0]) 406 ret = children 407 408 self._mutex.unlock() 409 410 return ret
411 412
413 - def _add_to_cache(self, parent, child, info):
414 """ 415 Attach a child to a parent in the cache 416 """ 417 418 self._mutex.testandset() 419 420 parent = repr(parent) 421 if not self._cache.has_key(parent): 422 self._cache[parent] = [(child, info) ,] 423 else: 424 self._cache[parent].append((child, info)) 425 426 self._mutex.unlock()
427 428
429 - def get_real_uri(self, uri):
430 # At this point we need to convert our internal stage6 431 # uri to the real http uri that can be used to play a video 432 # Fortunately, we just have to change the scheme. 433 http = MediaUri(uri) 434 http.scheme = 'http' 435 self.info("The URI is %s" % http) 436 return http
437 438
439 - def _read_url(self, url):
440 """ 441 Read an URI and return its content 442 """ 443 444 """ 445 dl = Downloader(url, "stage6.html") 446 scheme, host, port, path = client._parse(url) 447 reactor.connectTCP(host, port, dl) 448 449 def download_complete(result): 450 print "Download Complete." 451 reactor.stop() 452 453 def download_error(failure): 454 print "Error:", failure.getErrorMessage() 455 reactor.stop() 456 457 dl.deferred.addCallback(download_complete).addErrback(download_error) 458 """ 459 460 try: 461 f = urllib2.urlopen(url) 462 except urllib2.URLError, u: 463 self.warning("Could not connect to stage6-server: %s" % u) 464 return None 465 return f.read()
466 467
468 - def _retrieve_children(self, uri, list_of_children, add_info=False):
469 """ 470 retrieve the children of uri and fills list 471 472 @param uri: the URI to analyze 473 @type uri: L{elisa.core.media_uri.MediaUri} 474 @param list_of_children: List where the children will be appended 475 @type list_of_children: list 476 @param add_info: Add also the thumbnails to the list 477 @type add_info: bool 478 """ 479 480 # If the uri requested is in the cache, we return the cached children 481 cache = self._get_cached_uri(uri, list_of_children, add_info) 482 if cache: 483 self.debug('Loaded from cache: %s' % repr(uri)) 484 return cache 485 486 # if the uri path is /, we have to retrieve the tags from the main stage6 page 487 if uri.path == '/': 488 ### Make a main menu: 489 490 uri = MediaUri('stage6:///videos/') 491 uri.label = T_(N_("Featured Videos")) 492 list_of_children.append((uri, {})) 493 494 uri = MediaUri("stage6:///videos/order:hotness") 495 uri.label = T_(N_("Hottest Videos")) 496 list_of_children.append((uri, {})) 497 498 uri = MediaUri("stage6:///videos/order:date") 499 uri.label = T_(N_("Latest Videos")) 500 list_of_children.append((uri, {})) 501 502 uri = MediaUri("stage6:///videos/order:length") 503 uri.label = T_(N_("Longest Videos")) 504 list_of_children.append((uri, {})) 505 506 uri = MediaUri("stage6:///tags") 507 uri.label = T_(N_("Tags")) 508 list_of_children.append((uri, {})) 509 510 if self._loggedIn: 511 uri = MediaUri("stage6:///me") 512 uri.label = T_(N_("my WatchList")) 513 list_of_children.append((uri, {})) 514 515 if len(self._certain_uris): 516 uri = MediaUri('stage6:///certains') 517 uri.label = T_(N_("Certain Uris")) 518 list_of_children.append((uri, {})) 519 520 return list_of_children 521 522 elif uri.path == '/tags': 523 ### Make a main menu: 524 uri = MediaUri('stage6:///tags/order:NoOrder') 525 uri.label = T_(N_("Featured tags")) 526 list_of_children.append((uri, {})) 527 uri = MediaUri("stage6:///tags/order:hotness") 528 uri.label = T_(N_("Hottest tags")) 529 list_of_children.append((uri, {})) 530 uri = MediaUri("stage6:///tags/order:date") 531 uri.label = T_(N_("Latest tags")) 532 list_of_children.append((uri, {})) 533 uri = MediaUri("stage6:///tags/order:length") 534 ## Does that make any sense? 535 uri.label = T_(N_("Largest tags")) 536 list_of_children.append((uri, {})) 537 return list_of_children 538 539 elif uri.path == '/certains': 540 for uri,label in self._certain_uris.items(): 541 m = MediaUri(uri) 542 if label != None: 543 m.label = label 544 list_of_children.append((m, {})) 545 546 # we have a list of tags 547 elif uri.path.startswith('/tags'): 548 # We retrieve the HTML page 549 path = uri.path[5:] 550 to_parse = self._read_url(self.TAGS_URL + uri.path) 551 if not to_parse: 552 ## An Error happend: 553 return list_of_children 554 # create the parser and retrieve the tags 555 parser = StageParser(to_parse) 556 tags = parser.get_tags() 557 558 # We add to the children list a MediaUri representing each tag 559 for tag in tags: 560 t = MediaUri("stage6:///videos/tag:%s%s" % (tag,path)) 561 t.label = tag 562 563 if add_info: 564 list_of_children.append((t, {})) 565 else: 566 list_of_children.append(t) 567 # Cache the uri 568 self._add_to_cache(uri, t, {}) 569 570 elif uri.path == "/me": 571 url = "http://stage6.divx.com/user/any/watchlist/" 572 to_parse = self._read_url(url) 573 if not to_parse: 574 ### An Error happend: 575 return list_of_children 576 577 parser = StageParser(to_parse) 578 list = parser.get_watchlist() 579 for item in list: 580 uri = MediaUri("stage6://%s" % item['href']) 581 uri.label = item['label'] 582 img = MediaUri(item['img']) 583 list_of_children.append( (uri, {'default_image' : img})) 584 elif uri.path.startswith('/pager'): 585 pages = int(uri.get_param('pages', 0)) 586 # FIXME: how could this be done i18n ? 587 pages_string = "Page %%%d.d" % len(str(pages)) 588 path = uri.path[7:] 589 page_n = 2 590 while page_n <= pages: 591 page = MediaUri("stage6:///%s?page=%s" % 592 (path, page_n)) 593 page.label = pages_string % page_n 594 list_of_children.append((page, {})) 595 page_n +=1 596 else: 597 path = uri.path[1:] # Remove first slash 598 page = uri.get_param('page', 1) 599 url_path = "%s%s?page=%s" % (self.MAIN_URL, path, page) 600 self.debug("Asking stage6 for: %s" % url_path) 601 # download HTML page and parse it to retrieve the video list 602 to_parse = self._read_url(url_path) 603 if not to_parse: 604 self.warning("Reading didn't work") 605 ### Error: 606 return list_of_children 607 parser = StageParser(to_parse) 608 videos = parser.get_videos() 609 self.debug("Found %s videos on that page" % len(videos)) 610 ## uncomment this line an comment the line beneath it, and the 611 ## paging is enabled 612 pages = 0 613 if self._more_pages: 614 pages = parser.get_pages() 615 616 # We add to the children list a MediaUri representing each video 617 for v in videos: 618 t = MediaUri(self.VIDEOS_URL + v[0] + "/.avi") 619 label = v[1].decode("utf-8") 620 # set the uri label to the name of the video 621 t.label = unquote(label) 622 if add_info: 623 # Add the thumbnail url to the info dict 624 uri = v[2] 625 d = DictObservable() 626 if uri.startswith('http'): 627 d['default_image'] = MediaUri(uri) 628 if v[3] != None: 629 d['length'] = v[3] 630 list_of_children.append((t, d)) 631 # Cache the uri 632 self._add_to_cache(uri, t, d) 633 else: 634 list_of_children.append(t) 635 # Cache the uri 636 self._add_to_cache(uri, t, {}) 637 638 if pages > 1 and page == 1: 639 if pages > self._max_pages: 640 pages = self._max_pages 641 642 pager = MediaUri("stage6:///pager/%s?pages=%s" 643 %(path, pages)) 644 pager.label = T_(N_("Other Pages")) 645 list_of_children.append((pager, {})) 646 647 return list_of_children
648
649 - def get_direct_children(self, uri, children_with_info):
650 # Same as get_direct_children() except we also fill an information dict 651 return self._def_action.insert_action(0, self._retrieve_children, uri, children_with_info, add_info=True)
652 653
654 - def next_location(self, uri, root=None):
655 return self._def_action.insert_action(0, self._next_location, uri, root=root)
656
657 - def _next_location(self, uri, root=None):
658 if not root: 659 root_str = MediaUri(u'stage6:///') 660 else: 661 root_str = repr(root) 662 663 to_find = repr(uri) 664 # is it cached ? 665 if self._cache.has_key(root_str): 666 for child, children in self._cache.iteritems(): 667 # look if it is a child of root 668 if child.startswith(root_str): 669 i = 0 670 while i < len(self._cache[child]): 671 # Is that our uri ? 672 if to_find == self._cache[child][i]: 673 # Check if there is a uri following 674 i += 1 675 if i < len(self._cache[child]): 676 # if yes, returns it 677 return MediaUri(self._cache[child][i]) 678 break 679 i += 1 680 681 return None
682
683 - def previous_location(self, uri):
684 return None
685
686 - def uri_is_monitorable(self, uri):
687 # We cannot monitor the uri for a change 688 return False
689
690 - def uri_is_monitored(self, uri):
691 # Always cannot be monitored 692 return False
693
694 - def open(self, uri, mode=None, block=True):
695 # We cannot open 'tags' 696 if self.is_directory(uri): 697 return None 698 699 # What we do here is convert the uri in its http form, 700 # and ask the media_manager to provide a suitable component 701 # - such as GnomeVFSProvider - to do the work for us 702 uri = self.get_real_uri(uri) 703 media_manager = common.application.media_manager 704 if media_manager.enabled: 705 media = media_manager.open(uri, mode, block) 706 else: 707 media = None 708 return media
709