Home | Trees | Indices | Help |
---|
|
1 # -*- coding: utf-8 -*- 2 # Elisa - Home multimedia server 3 # Copyright (C) 2006-2008 Fluendo Embedded S.L. (www.fluendo.com). 4 # All rights reserved. 5 # 6 # This file is available under one of two license agreements. 7 # 8 # This file is licensed under the GPL version 3. 9 # See "LICENSE.GPL" in the root of this distribution including a special 10 # exception to use Elisa with Fluendo's plugins. 11 # 12 # The GPL part of Elisa is also available under a commercial licensing 13 # agreement from Fluendo. 14 # See "LICENSE.Elisa" in the root directory of this distribution package 15 # for details on that license. 16 17 """ 18 Metadata crawler 19 """ 20 21 __maintainer__ = 'Philippe Normand <philippe@fluendo.com>' 22 __maintainer1__ = 'Alessandro Decina <alessandro@fluendo.com>' 23 24 from elisa.core import log 25 from elisa.core import common 26 from elisa.core.media_uri import MediaUri 27 from elisa.extern import enum 28 from elisa.core.utils import classinit 29 30 import os 31 import time 32 from twisted.internet import reactor, defer, task 335235 - def __init__(self, source_uri, source_id, 36 current_uri=None, current_metadata=None, 37 requested_media_types=None):38 self.defer = defer.Deferred() 39 self.source_uri = source_uri 40 self.source_id = source_id 41 if current_uri is None: 42 current_uri = source_uri 43 self.current_uri = current_uri 44 if current_metadata is None: 45 current_metadata = {} 46 self.current_metadata = current_metadata 47 self.requested_media_types = requested_media_types4854 """ 55 Scan media sources extracting metadata and populating the 56 L{elisa.core.media_db.MediaDB}. 57 58 The MediaScanner crawls sources looking for resources from which to get 59 metadata. Static source locations can be specified in the configuration 60 file. Sources can also be added, removed and updated at runtime with 61 L{elisa.core.media_scanner.MediaScanner.add_source}, 62 L{elisa.core.media_scanner.MediaScanner.remove_source} and 63 L{elisa.core.media_scanner.MediaScanner.update_source). 64 65 Sources are periodically rescanned to keep the 66 L{elisa.core.media_db.MediaDB} up to date. Monitorable sources are 67 automatically watched for metadata changes so they need not be rescanned. 68 FIXME: monitoring is not implemented currently. 69 """ 70 71 # Allows property fget/fset/fdel/doc overriding 72 __metaclass__ = classinit.ClassInitMeta 73 __classinit__ = classinit.build_properties 74 75 default_config = {'db_backend': 'sqlite', 76 'database': 'elisa.db', 77 'username': '', 78 'password': '', 79 'hostname': '', 80 'scan_interval': '0.01', 81 'commit_interval': '5' 82 } 83 84 85 min_commit_interval = 1 86 min_scan_interval = 0.01 87304 305 # eat the error... 306 307 self.info("Scheduling update of %r", source_uri) 308 source_info = self._media_manager.get_source_for_uri(source_uri) 309 item = MediaScannerItem(source_uri, source_info.id, 310 current_uri, None, requested_media_types) 311 item.defer.addCallback(source_update_done) 312 item.defer.addErrback(source_update_failure) 313 self._enqueue(item) 314 315 if self._started: 316 self._ensure_running() 317 318 return item.defer 31989 """ 90 Create the L{elisa.core.media_scanner.MediaScanner}. 91 92 @param media_manager: the C{media_manager} used to access media 93 sources 94 @type media_manager: L{elisa.core.media_manager.MediaManager} 95 @param metadata_manager: the C{metadata_manager} used to retrieve 96 metadata about a specific URI 97 @param metadata_manager: L{elisa.core.metadata_manager.MetadataManager} 98 """ 99 log.Loggable.__init__(self) 100 self.debug("Creating") 101 102 if common.application: 103 app_config = common.application.config 104 self._config = app_config.get_section('media_scanner', 105 self.default_config) 106 else: 107 self._config = self.default_config 108 109 self._queue = [] 110 self._delayed_start = 0 111 self._media_manager = media_manager 112 self._metadata_manager = metadata_manager 113 114 self._started = False 115 self._running = False 116 117 self._fivemin_locations = self._get_option('fivemin_location_updates',[]) 118 self._hourly_locations = self._get_option('hourly_location_updates',[]) 119 self._daily_locations = self._get_option('daily_location_updates',[]) 120 self._weekly_locations = self._get_option('weekly_location_updates',[]) 121 self._unmonitored_locations = self._get_option('unmonitored_locations',[]) 122 self._ignored_locations = [ unicode(MediaUri(l)) 123 for l in self._get_option('ignored_locations',[])] 124 self._update_intervals = dict(fivemin=60*5, 125 hour = 60*60, day = 60* 60 * 24, 126 week = 60 * 60 * 24 * 7) 127 self._interval_locations = {'fivemin': self._fivemin_locations, 128 'hour': self._hourly_locations, 129 'day': self._daily_locations, 130 'week': self._weekly_locations} 131 try: 132 interval = float(self._get_option('scan_interval')) 133 self._scan_interval = max(interval, self.min_scan_interval) 134 except ValueError: 135 self._scan_interval = float(self.default_config['scan_interval']) 136 137 try: 138 interval = int(self._get_option('commit_interval')) 139 self._commit_interval = max(interval, self.min_commit_interval) 140 except ValueError: 141 self._commit_interval = int(self.default_config['commit_interval']) 142 143 self._source_scan_start = {} 144 self._delayed_calls = {} 145 self._call_later_next = None 146 self._commit_call = None 147 self._commit = False148150 if default is None: 151 default = self.default_config.get(name) 152 153 return self._config.get(name, default)154 157159 self.debug("Saving config") 160 self._set_option('fivemin_location_updates', self._fivemin_locations) 161 self._set_option('hourly_location_updates', self._hourly_locations) 162 self._set_option('daily_location_updates',self._daily_locations) 163 self._set_option('weekly_location_updates',self._weekly_locations) 164 self._set_option('unmonitored_locations',self._unmonitored_locations) 165 166 common.application.config.set_section('media_scanner', self._config)167 172174 """ 175 Start the scanner. 176 177 @param delayed_start: number of seconds to wait before starting 178 @type delayed_start: int 179 """ 180 if self.enabled: 181 self.debug("Starting") 182 self._started = True 183 self._delayed_start = delayed_start 184 self._schedule_periodic_updates() 185 self._ensure_running()186188 """ 189 Stop the scanner. 190 """ 191 self.info("Stopping") 192 self._started = False 193 194 for label, call in self._delayed_calls.iteritems(): 195 call.cancel() 196 197 if self._call_later_next is not None: 198 self._call_later_next.cancel() 199 self._call_later_next = None 200 201 if self._commit_call is not None and self._commit_call.active(): 202 self._commit_call.cancel() 203 self._commit_call = None 204 205 self._checkpoint() 206 207 self._delayed_calls = {} 208 self._save_config()209211 """ 212 Add a new source to the scan queue. 213 214 @param source_uri: The location of the source 215 @type source_uri: L{elisa.core.media_uri.MediaUri} 216 @param media_types: a sequence of media types to scan on this source eg: 217 ('audio', 'image') or None 218 @type media_types: sequence 219 @returns: defer called when the source scan is complete 220 @rtype: L{twisted.internet.defer.Deferred} 221 """ 222 if self.enabled: 223 return self._schedule_source_update(source_uri, 224 requested_media_types=media_types) 225 else: 226 return defer.succeed([])227229 if self.enabled: 230 return self._schedule_source_update(source_uri, uri, media_types) 231 else: 232 return defer.succeed(None)233235 """ 236 Mark a source as unavailable in the database and stop monitoring it. 237 238 @param source_uri: The location of the source 239 @type source_uri: L{elisa.core.media_uri.MediaUri} 240 """ 241 # TODO: cancel an eventual update of the source 242 pass243 247249 """ 250 Schedule a new scan of the source located at given uri. 251 252 @param source_uri: The location of the source 253 @type source_uri: L{elisa.core.media_uri.MediaUri} 254 @returns: defer 255 @rtype: L{twisted.internet.defer.Deferred} called when the 256 257 source update is complete 258 """ 259 if self.enabled: 260 return self._schedule_source_update(source_uri) 261 else: 262 return defer.succeed([])263 264265 - def _schedule_source_update(self, source_uri, current_uri=None, 266 requested_media_types=None):267 if source_uri in self._ignored_locations: 268 self.info('Ignoring update of %r', source_uri) 269 # FIXME: define an exception to return here 270 return defer.succeed('ignored') 271 272 if not self._media_manager.is_scannable(source_uri): 273 self.info('%r is not scannable', source_uri) 274 # FIXME: define an exception to return here 275 return defer.succeed('not scannable') 276 277 if source_uri in self._source_scan_start.iterkeys(): 278 self.info('Source %r is already being updated', source_uri) 279 return defer.succeed('updating already') 280 281 def source_update_done(children_update_info): 282 t1 = time.time() 283 t0 = self._source_scan_start.pop(source_uri) 284 media_manager = self._media_manager 285 286 delta = t1 - t0 287 count = media_manager.media_db.get_files_count_for_source_uri(source_uri) 288 if count: 289 speed = "(%s s/file)" % (delta / count,) 290 else: 291 speed = "" 292 msg = 'Parse of %s took %s seconds %s' % (source_uri, delta, speed) 293 294 self.info(msg) 295 296 return children_update_info297 298 def source_update_failure(reason): 299 # FIXME: this is called when scanning an item in the source fails. 300 # We just log and skip the error for the moment, but this should be 301 # properly handled by upper layers 302 self.warning('scanning of an item of %s failed: %s', 303 source_uri, reason)321 was_running = self._running 322 self._running = True 323 if not was_running: 324 # start now if we aren't already scanning 325 self._process_next(delayed=True)326328 self.log("enqueing %s", item) 329 self._queue.insert(0, item) 330 self.log("%r items in queue", len(self._queue))331 338340 assert self._call_later_next is None 341 # do the real work in delayed calls so that we a) avoid recursion and b) 342 # let other reactor sources do their work while we scan 343 if seconds is None: 344 seconds = self._scan_interval 345 346 self._call_later_next = reactor.callLater(seconds, self._process_next_real, delayed)347 351353 self._call_later_next = None 354 if not self._started: 355 self.debug('stopping _process_next, scanner stopped') 356 return 357 358 if delayed and self._delayed_start: 359 self.debug("Delayed start in %s seconds", self._delayed_start) 360 self._delayed_start -= 1 361 self._process_next(1, delayed) 362 return 363 364 if self._commit_call is None or not self._commit_call.active(): 365 self._commit_call = \ 366 reactor.callLater(self._commit_interval, self._checkpoint) 367 368 item = self._dequeue() 369 if not item: 370 if self._commit_call is not None and self._commit_call.active(): 371 self._commit_call.cancel() 372 self._commit_call = None 373 374 self._checkpoint() 375 self.debug('scanner queue empty') 376 self._running = False 377 return 378 379 if item.source_uri == item.current_uri: 380 # record a timestamp so we can report roughly how long it took to 381 # scan this source 382 self._source_scan_start[item.source_uri] = time.time() 383 384 try: 385 self._process(item) 386 except Exception, exc: 387 self.warning("Source scanning failed: %s" % exc) 388 raise389391 # schedule some events 392 for label, interval in self._update_intervals.iteritems(): 393 call = reactor.callLater(interval, self._periodic_update, label) 394 self._delayed_calls[label] = call395397 # this call is done, we will readd it later 398 del self._delayed_calls[interval_label] 399 400 interval = self._update_intervals.get(interval_label) 401 sources = self._interval_locations.get(interval_label) 402 if sources: 403 msg = "Launching scheduled update of the %s: %s" % (interval_label, 404 sources) 405 self.info(msg) 406 407 def periodic_update_source_done(result): 408 call = reactor.callLater(interval, 409 self._periodic_update, interval_label) 410 self._delayed_calls[interval_label] = call 411 412 return result413 414 def periodic_update_source_failed(result, source_uri): 415 # FIXME: log a warning 416 locations = self._interval_locations.get(interval_label) 417 locations.remove(source_uri) 418 419 # swallow the error and continue in periodic_update_source_done 420 # so we readd the call 421 422 for source_uri in sources: 423 dfr = self.update_source(MediaUri(source_uri)) 424 dfr.addErrback(periodic_update_source_failed, source_uri) 425 dfr.addCallback(periodic_update_source_done) 426428 source_uri = item.source_uri 429 source_id = item.source_id 430 current_uri = item.current_uri 431 requested_media_types = item.requested_media_types 432 433 self.debug("Processing source %r uri %r", source_uri, current_uri) 434 435 def is_directory_done(is_directory): 436 if is_directory: 437 self.debug("%r has children", current_uri) 438 def get_direct_children_done(children): 439 # FIXME: iterating all the children here is likely to be slow, 440 # what we need is a better interface to just get the *next* 441 # children 442 443 self.debug('got children for %s: %s', source_uri, children) 444 445 def iterate_over_children_iter(children, children_deferreds): 446 for child in children: 447 uri, metadata = child 448 449 try: 450 current_mtime = metadata['fs_mtime'] 451 except KeyError: 452 pass 453 else: 454 media_db = self._media_manager.media_db 455 info = media_db.get_media_information(uri, 456 extended=False) 457 if info and info.fs_mtime is not None and \ 458 info.fs_mtime == current_mtime: 459 self.debug("skipping %s not modified " 460 "since last scan", uri) 461 self._call_later_delay = 0.0 462 yield None 463 continue 464 465 child_item = MediaScannerItem(source_uri, source_id, 466 uri, metadata, requested_media_types) 467 children_deferreds.append(child_item.defer) 468 self.debug("%r adding %r", source_uri, uri) 469 self._enqueue(child_item) 470 471 yield None472 473 def iterate_over_children_done(iterator, children_deferreds): 474 # item.defer is the deferred of the parent dir, we want to 475 # call it back only after having scanned all the children 476 dfr = defer.DeferredList(children_deferreds, consumeErrors=1) 477 dfr.chainDeferred(item.defer) 478 self._process_next() 479 480 children_deferreds = [] 481 dfr = task.coiterate(iterate_over_children_iter(children, 482 children_deferreds)) 483 dfr.addCallback(iterate_over_children_done, 484 children_deferreds) 485 486 def get_direct_children_failure(failure): 487 self.warning('error getting children for %s: %s (skipping)', 488 source_uri, failure) 489 self._process_next() 490 item.defer.errback(failure) 491 492 children_defer = self._media_manager.get_direct_children(current_uri, []) 493 children_defer.addCallback(get_direct_children_done) 494 children_defer.addErrback(get_direct_children_failure) 495 else: 496 self._process_media_file(item) 497 498 def is_directory_failure(failure): 499 self.warning('is_directory failed on uri %s: %s (skipping)', 500 current_uri, failure) 501 502 is_directory_defer = self._media_manager.is_directory(current_uri) 503 is_directory_defer.addCallback(is_directory_done) 504 is_directory_defer.addErrback(is_directory_failure) 505507 source = item.source_uri 508 source_id = item.source_id 509 uri = item.current_uri 510 metadata = item.current_metadata 511 requested_media_types = item.requested_media_types 512 513 def get_metadata_done(res_metadata): 514 self.debug('finished scan of %s', uri) 515 516 file_type = res_metadata['file_type'] 517 518 if requested_media_types and file_type not in requested_media_types: 519 self.debug('ignoring %s, media type %s, requested %s' % 520 (uri, file_type, requested_media_types)) 521 self._process_next() 522 item.defer.errback('not interesting') 523 return res_metadata 524 525 db_metadata = {} 526 if file_type == 'audio': 527 for key in ('artist', 'album', 'song', 'track'): 528 try: 529 db_metadata[key] = res_metadata[key] 530 except KeyError: 531 pass 532 533 default_image = res_metadata['default_image'] 534 if default_image: 535 # FIXME: this is a hack to avoid saving covers 536 # coming from Amazon in db... 537 home = os.path.expanduser('~/.elisa') 538 amazon_cache = os.path.join(home, 'amazon_cache') 539 if not default_image.path.startswith(amazon_cache): 540 db_metadata['cover_uri'] = default_image 541 542 if file_type in ('audio', 'video', 'image'): 543 media_db = self._media_manager.media_db 544 media_db.add_media(uri, uri.label, source_id, 'file', 545 format=file_type, typ=res_metadata['mime_type'], 546 fs_mtime=res_metadata['fs_mtime'], metadata=db_metadata) 547 self._call_later_delay = 0.3 548 549 # continue with the next file 550 self._process_next() 551 item.defer.callback(res_metadata) 552 553 return res_metadata554 555 def get_metadata_failure(failure): 556 self.debug('error scanning %s: %s', uri, failure) 557 self._process_next() 558 item.defer.errback(failure) 559 560 self.debug('starting scan of %s' % uri) 561 562 metadata['uri'] = uri 563 for tag in ('content-type', 'file_type', 'mime_type', 'default_image', 564 'artist', 'album', 'song', 'track'): 565 if not metadata.has_key(tag): 566 metadata[tag] = None 567 568 get_metadata_defer = \ 569 self._metadata_manager.get_metadata(metadata, low_priority=True) 570 get_metadata_defer.addCallback(get_metadata_done) 571 get_metadata_defer.addErrback(get_metadata_failure) 572
Home | Trees | Indices | Help |
---|
Generated by Epydoc 3.0beta1 on Wed Jan 16 19:10:06 2008 | http://epydoc.sourceforge.net |