Home | Trees | Indices | Help |
---|
|
1 # -*- coding: utf-8 -*- 2 # Elisa - Home multimedia server 3 # Copyright (C) 2006-2008 Fluendo Embedded S.L. (www.fluendo.com). 4 # All rights reserved. 5 # 6 # This file is available under one of two license agreements. 7 # 8 # This file is licensed under the GPL version 3. 9 # See "LICENSE.GPL" in the root of this distribution including a special 10 # exception to use Elisa with Fluendo's plugins. 11 # 12 # The GPL part of Elisa is also available under a commercial licensing 13 # agreement from Fluendo. 14 # See "LICENSE.Elisa" in the root directory of this distribution package 15 # for details on that license. 16 17 """ 18 StageMedia component class 19 """ 20 21 __maintainer__ = 'Florian Boucault <florian@fluendo.com>' 22 __maintainer2__ = 'Benjamin Kampmann <benjamin@fluendo.com>' 23 24 from elisa.base_components.media_provider import MediaProvider, UriNotMonitorable 25 from elisa.core.media_uri import MediaUri, unquote 26 from elisa.core.utils import deferred_action 27 from elisa.core import common 28 from elisa.core.bus import bus_message 29 30 from elisa.extern.configobj import Section 31 32 from elisa.core.observers.dict import DictObservable 33 34 from twisted.internet import defer, threads 35 from twisted.internet import reactor 36 from twisted.web import client 37 #from twisted.web.client import HTTPDownloader 38 39 from BeautifulSoup import BeautifulSoup 40 41 import urllib2, urllib, cookielib 42 import re 43 from mutex import mutex 44 45 from elisa.extern.translation import gettexter, N_ 46 T_ = gettexter('elisa-stage6') 47 4850 51 """ 52 This class implements a parser to retrieve video titles and 53 URL from a Stage6 HTML page 54 """ 55 56 # Some regexps that will help retrieve the data we are looking 57 # for in HTML pages 58 reg_href = re.compile("href=\"(.*)\"") 59 reg_href_avatar = re.compile("href=\"(.*)\"><acronym") 60 reg_img = re.compile("alt=\"(.*)\" src=\"(.*)\"") 61 reg_time = re.compile("<img (.*)/></acronym>(.*)") 62 reg_img_avatar = re.compile("src=\"(.*)\" alt=") 63 reg_img_title = re.compile("<acronym title=\"(.*)\" class=\"no-border\">") 64 reg_title = re.compile("title=\"(.*)\">(.*)</a></") 65 reg_video_id = re.compile("video/(.*)/") 66 reg_pages = re.compile(">(.*)</a>") 67 reg_watch_type = re.compile("<div class=\"user-watch\" id=\"(.*)\">") 68208 209 210 """ 211 class Downloader(HTTPDownloader): 212 213 def pageEnd(self): 214 215 print "got file" 216 """ 217 21870 """ 71 @param string_to_parse: the HTML code to parse 72 @type string_to_parse: string 73 """ 74 75 self._to_parse = string_to_parse76 77 109111 """ 112 Returns an integer representing the last page. If there are no pages 113 we return a zero 114 """ 115 b = BeautifulSoup(self._to_parse) 116 117 res = b.findAll('a', attrs={'class' : 'pagination-number'}) 118 if len(res) == 0: 119 return 0 120 121 last_page = self.reg_pages.search(str(res[len(res)-1])).groups()[0] 122 123 return last_page124126 """ 127 Returns a list of dictionaries, which look like this: 128 {'label' : '', 'href' : '', 'img': ''} 129 """ 130 131 b = BeautifulSoup(self._to_parse) 132 133 res = b.findAll('div', attrs={'class' : 'user-watch'}) 134 135 136 list = [] 137 138 for div in res: 139 type = self.reg_watch_type.search(str(div)).groups()[0] 140 avatar = div.find('div', attrs={'class': 'avatar'}) 141 link = self.reg_href_avatar.search(str(avatar)).groups()[0] 142 avatar_img = self.reg_img_avatar.search(str(avatar)).groups()[0] 143 title = self.reg_img_title.search(str(avatar)).groups()[0] 144 145 if type.startswith('user'): 146 link = "%s/videos/group:uservideos" % link 147 else: 148 link = "%s/videos/" % link 149 150 list.append({'label' : unquote(title), 'href' : link, 'img' : avatar_img}) 151 152 return list153155 """ 156 Returns a list of videos, with their name, URL and thumbnail. This is parsing the 157 HTML data we have in self._to_parse 158 159 @rtype : list of (string, string, string) 160 """ 161 162 videos = [] 163 164 # Video info are between <div> marks with their class='video-title' 165 b = BeautifulSoup(self._to_parse) 166 resu = b.findAll('div', attrs={'class': 'video'}) 167 168 for res in resu: 169 vid_tit = res.find('div', attrs={'class' : 'video-title'}) 170 href = res.findAll('img') 171 href = href[len(href)-1] 172 if not vid_tit: 173 continue 174 175 line = str(vid_tit) 176 overlay = str(res.find('div', attrs={'class' : 'video-overlay'})) 177 times = self.reg_time.search(overlay) 178 if times: 179 time = times.groups()[1] 180 else: 181 time = None 182 183 184 185 # get the href to retrieve video id 186 match = self.reg_href.search(line) 187 if not match: 188 continue 189 190 # retrieve the thumbnail location 191 imgp = self.reg_img.search(str(href)) 192 if imgp: 193 img = imgp.groups()[1] 194 else: 195 img = '' 196 197 video_id = self.reg_video_id.search(match.groups()[0]) 198 199 if video_id: 200 title = self.reg_title.search(line) 201 if title: 202 # Finally we add the video id, its title and thumbnail location 203 videos.append((video_id.groups()[0], 204 title.groups()[1], 205 img, time)) 206 207 return videos220 """ 221 This class implements stage6 video website browsing support 222 http://stage6.divx.com 223 """ 224 225 # URL where we are going to look for video tags 226 MAIN_URL = "http://stage6.divx.com/" 227 TAGS_URL = MAIN_URL + "videos/" 228 # URL where the videos are actually located 229 VIDEOS_URL = "stage6://video.stage6.com/" 230 231 config_doc = {'email' : 'The email which is registered at stage6, optional', 232 'password' : 'the password', 233 'pages' : "Show the 'other pages'-item, experimental. See" 234 " CONFIGURATION for more informations", 235 'certain_uris': "This is a list of other Stage6-Uris, see" 236 " CONFIGURATION in the plugins-directory for" 237 " more informations.", 238 'max_pages' : "The maximum number of other pages to show" 239 } 240 241 default_config = {'email' : '', 242 'password' : '', 243 'pages' : 0, 244 'max_pages' : 99, 245 'certain_uris': [] 246 } 247 248709250 """ 251 We init here also the base class. Caching of directory listng 252 will done in the self._cache dict, protected from concurrent 253 access by a mutex 254 """ 255 256 MediaProvider.__init__(self) 257 258 # We create a cache of retrieved results 259 self._cache = {} 260 self._mutex = mutex() 261 self._loggedIn = False 262 self._more_pages = 0 263 self._max_pages = 99 264 self._certain_uris = {} 265 266 # Here we create a DeferredActionManager, which permits us 267 # to manage a queue of deferred actions. This is useful 268 # for providers which uses a data protocol which can take a 269 # lot of resources, in order to have only one request at a time 270 self._def_action = deferred_action.DeferredActionsManager() 271 272 self._cookie = cookielib.LWPCookieJar() 273 # SetUp CookieStuff 274 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self._cookie)) 275 urllib2.install_opener(opener)276 277279 uri = "stage6:///" 280 action_type = bus_message.MediaLocation.ActionType.LOCATION_ADDED 281 msg = bus_message.InternetLocation(action_type, "Stage6", 'stage6', uri, 282 media_types=['video',], 283 theme_icon='stage6') 284 common.application.bus.send_message(msg) 285 self._more_pages = self.config.get('pages', 0) 286 self._max_pages = self.config.get('max_pages', 99) 287 uris = self.config.get('certain_uris', []) 288 289 if isinstance(uris, list): 290 291 for uri in uris: 292 self._certain_uris[uri] = None 293 if isinstance(self.config, Section): 294 for section in self.config.sections: 295 if section in self._certain_uris.keys(): 296 pass 297 ## TODO: make this work! 298 # print type(section), section, dir(section) 299 # label = section.get('label', None) 300 # self._certain_uris[section] = label 301 else: 302 self.warning("Could not read configuration options certain_uris." 303 "Maybe it is not a list?") 304 305 306 # The logIn should not block ;) 307 return threads.deferToThread(self._logIn)308 309 313 314316 317 email = self.config.get('email', '') 318 pw = self.config.get('password', '') 319 320 if email != -1 and pw != '': 321 322 self.debug("Loggin in with %s:%s" % (email, pw)) 323 data = urllib.urlencode({'email' : email, 324 'password' : pw, 325 'account' : 'true'}) 326 req = urllib2.Request('http://stage6.divx.com/users/login/login-post', 327 data) 328 329 try: 330 handler = urllib2.urlopen(req) 331 except urllib2.URLError, u: 332 self.warning("Could not log into stage6 server: %s" % u) 333 return 334 else: 335 res = handler.read() 336 if res.find('Log out') != -1: 337 self.debug("Logged In") 338 self._loggedIn = True 339 return 340 341 self.warning("Could not log in. Maybe you should check your login" 342 " infromations")343 344 348 351 354356 # If the uri starts with the stage6 video domain 357 # name, we know it is a video. Otherwise it is 358 # considered as a directory 359 if repr(uri).startswith(self.VIDEOS_URL): 360 return { 'file_type' : 'video', 361 'mime_type' : '' } 362 else: 363 return { 'file_type' : 'directory', 364 'mime_type' : '' }365 368370 # if the uri doesn't start with the stage6 video domain, 371 # we know it is a directory 372 return not repr(uri).startswith(self.VIDEOS_URL)373 376378 # We can consider that a video tag we have found on the stage6 379 # always have videos linked to. 380 return self.is_directory(uri)381 384 390392 """ 393 Return the list of children from a parent URI, 394 or None if this URI has not yet been cached 395 """ 396 self._mutex.testandset() 397 398 ret = None 399 # If we have the uri cached, return it 400 if self._cache.has_key(repr(uri)): 401 for i in self._cache[repr(uri)]: 402 if add_info: 403 children.append(i) 404 else: 405 children.append(i[0]) 406 ret = children 407 408 self._mutex.unlock() 409 410 return ret411 412414 """ 415 Attach a child to a parent in the cache 416 """ 417 418 self._mutex.testandset() 419 420 parent = repr(parent) 421 if not self._cache.has_key(parent): 422 self._cache[parent] = [(child, info) ,] 423 else: 424 self._cache[parent].append((child, info)) 425 426 self._mutex.unlock()427 428430 # At this point we need to convert our internal stage6 431 # uri to the real http uri that can be used to play a video 432 # Fortunately, we just have to change the scheme. 433 http = MediaUri(uri) 434 http.scheme = 'http' 435 self.info("The URI is %s" % http) 436 return http437 438440 """ 441 Read an URI and return its content 442 """ 443 444 """ 445 dl = Downloader(url, "stage6.html") 446 scheme, host, port, path = client._parse(url) 447 reactor.connectTCP(host, port, dl) 448 449 def download_complete(result): 450 print "Download Complete." 451 reactor.stop() 452 453 def download_error(failure): 454 print "Error:", failure.getErrorMessage() 455 reactor.stop() 456 457 dl.deferred.addCallback(download_complete).addErrback(download_error) 458 """ 459 460 try: 461 f = urllib2.urlopen(url) 462 except urllib2.URLError, u: 463 self.warning("Could not connect to stage6-server: %s" % u) 464 return None 465 return f.read()466 467469 """ 470 retrieve the children of uri and fills list 471 472 @param uri: the URI to analyze 473 @type uri: L{elisa.core.media_uri.MediaUri} 474 @param list_of_children: List where the children will be appended 475 @type list_of_children: list 476 @param add_info: Add also the thumbnails to the list 477 @type add_info: bool 478 """ 479 480 # If the uri requested is in the cache, we return the cached children 481 cache = self._get_cached_uri(uri, list_of_children, add_info) 482 if cache: 483 self.debug('Loaded from cache: %s' % repr(uri)) 484 return cache 485 486 # if the uri path is /, we have to retrieve the tags from the main stage6 page 487 if uri.path == '/': 488 ### Make a main menu: 489 490 uri = MediaUri('stage6:///videos/') 491 uri.label = T_(N_("Featured Videos")) 492 list_of_children.append((uri, {})) 493 494 uri = MediaUri("stage6:///videos/order:hotness") 495 uri.label = T_(N_("Hottest Videos")) 496 list_of_children.append((uri, {})) 497 498 uri = MediaUri("stage6:///videos/order:date") 499 uri.label = T_(N_("Latest Videos")) 500 list_of_children.append((uri, {})) 501 502 uri = MediaUri("stage6:///videos/order:length") 503 uri.label = T_(N_("Longest Videos")) 504 list_of_children.append((uri, {})) 505 506 uri = MediaUri("stage6:///tags") 507 uri.label = T_(N_("Tags")) 508 list_of_children.append((uri, {})) 509 510 if self._loggedIn: 511 uri = MediaUri("stage6:///me") 512 uri.label = T_(N_("my WatchList")) 513 list_of_children.append((uri, {})) 514 515 if len(self._certain_uris): 516 uri = MediaUri('stage6:///certains') 517 uri.label = T_(N_("Certain Uris")) 518 list_of_children.append((uri, {})) 519 520 return list_of_children 521 522 elif uri.path == '/tags': 523 ### Make a main menu: 524 uri = MediaUri('stage6:///tags/order:NoOrder') 525 uri.label = T_(N_("Featured tags")) 526 list_of_children.append((uri, {})) 527 uri = MediaUri("stage6:///tags/order:hotness") 528 uri.label = T_(N_("Hottest tags")) 529 list_of_children.append((uri, {})) 530 uri = MediaUri("stage6:///tags/order:date") 531 uri.label = T_(N_("Latest tags")) 532 list_of_children.append((uri, {})) 533 uri = MediaUri("stage6:///tags/order:length") 534 ## Does that make any sense? 535 uri.label = T_(N_("Largest tags")) 536 list_of_children.append((uri, {})) 537 return list_of_children 538 539 elif uri.path == '/certains': 540 for uri,label in self._certain_uris.items(): 541 m = MediaUri(uri) 542 if label != None: 543 m.label = label 544 list_of_children.append((m, {})) 545 546 # we have a list of tags 547 elif uri.path.startswith('/tags'): 548 # We retrieve the HTML page 549 path = uri.path[5:] 550 to_parse = self._read_url(self.TAGS_URL + uri.path) 551 if not to_parse: 552 ## An Error happend: 553 return list_of_children 554 # create the parser and retrieve the tags 555 parser = StageParser(to_parse) 556 tags = parser.get_tags() 557 558 # We add to the children list a MediaUri representing each tag 559 for tag in tags: 560 t = MediaUri("stage6:///videos/tag:%s%s" % (tag,path)) 561 t.label = tag 562 563 if add_info: 564 list_of_children.append((t, {})) 565 else: 566 list_of_children.append(t) 567 # Cache the uri 568 self._add_to_cache(uri, t, {}) 569 570 elif uri.path == "/me": 571 url = "http://stage6.divx.com/user/any/watchlist/" 572 to_parse = self._read_url(url) 573 if not to_parse: 574 ### An Error happend: 575 return list_of_children 576 577 parser = StageParser(to_parse) 578 list = parser.get_watchlist() 579 for item in list: 580 uri = MediaUri("stage6://%s" % item['href']) 581 uri.label = item['label'] 582 img = MediaUri(item['img']) 583 list_of_children.append( (uri, {'default_image' : img})) 584 elif uri.path.startswith('/pager'): 585 pages = int(uri.get_param('pages', 0)) 586 # FIXME: how could this be done i18n ? 587 pages_string = "Page %%%d.d" % len(str(pages)) 588 path = uri.path[7:] 589 page_n = 2 590 while page_n <= pages: 591 page = MediaUri("stage6:///%s?page=%s" % 592 (path, page_n)) 593 page.label = pages_string % page_n 594 list_of_children.append((page, {})) 595 page_n +=1 596 else: 597 path = uri.path[1:] # Remove first slash 598 page = uri.get_param('page', 1) 599 url_path = "%s%s?page=%s" % (self.MAIN_URL, path, page) 600 self.debug("Asking stage6 for: %s" % url_path) 601 # download HTML page and parse it to retrieve the video list 602 to_parse = self._read_url(url_path) 603 if not to_parse: 604 self.warning("Reading didn't work") 605 ### Error: 606 return list_of_children 607 parser = StageParser(to_parse) 608 videos = parser.get_videos() 609 self.debug("Found %s videos on that page" % len(videos)) 610 ## uncomment this line an comment the line beneath it, and the 611 ## paging is enabled 612 pages = 0 613 if self._more_pages: 614 pages = parser.get_pages() 615 616 # We add to the children list a MediaUri representing each video 617 for v in videos: 618 t = MediaUri(self.VIDEOS_URL + v[0] + "/.avi") 619 label = v[1].decode("utf-8") 620 # set the uri label to the name of the video 621 t.label = unquote(label) 622 if add_info: 623 # Add the thumbnail url to the info dict 624 uri = v[2] 625 d = DictObservable() 626 if uri.startswith('http'): 627 d['default_image'] = MediaUri(uri) 628 if v[3] != None: 629 d['length'] = v[3] 630 list_of_children.append((t, d)) 631 # Cache the uri 632 self._add_to_cache(uri, t, d) 633 else: 634 list_of_children.append(t) 635 # Cache the uri 636 self._add_to_cache(uri, t, {}) 637 638 if pages > 1 and page == 1: 639 if pages > self._max_pages: 640 pages = self._max_pages 641 642 pager = MediaUri("stage6:///pager/%s?pages=%s" 643 %(path, pages)) 644 pager.label = T_(N_("Other Pages")) 645 list_of_children.append((pager, {})) 646 647 return list_of_children648650 # Same as get_direct_children() except we also fill an information dict 651 return self._def_action.insert_action(0, self._retrieve_children, uri, children_with_info, add_info=True)652 653 656658 if not root: 659 root_str = MediaUri(u'stage6:///') 660 else: 661 root_str = repr(root) 662 663 to_find = repr(uri) 664 # is it cached ? 665 if self._cache.has_key(root_str): 666 for child, children in self._cache.iteritems(): 667 # look if it is a child of root 668 if child.startswith(root_str): 669 i = 0 670 while i < len(self._cache[child]): 671 # Is that our uri ? 672 if to_find == self._cache[child][i]: 673 # Check if there is a uri following 674 i += 1 675 if i < len(self._cache[child]): 676 # if yes, returns it 677 return MediaUri(self._cache[child][i]) 678 break 679 i += 1 680 681 return None682 685 689 693695 # We cannot open 'tags' 696 if self.is_directory(uri): 697 return None 698 699 # What we do here is convert the uri in its http form, 700 # and ask the media_manager to provide a suitable component 701 # - such as GnomeVFSProvider - to do the work for us 702 uri = self.get_real_uri(uri) 703 media_manager = common.application.media_manager 704 if media_manager.enabled: 705 media = media_manager.open(uri, mode, block) 706 else: 707 media = None 708 return media
Home | Trees | Indices | Help |
---|
Generated by Epydoc 3.0beta1 on Wed Jan 16 19:10:08 2008 | http://epydoc.sourceforge.net |