Package CedarBackup2 :: Module util
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.util

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008,2010 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # Portions copyright (c) 2001, 2002 Python Software Foundation. 
  15  # All Rights Reserved. 
  16  # 
  17  # This program is free software; you can redistribute it and/or 
  18  # modify it under the terms of the GNU General Public License, 
  19  # Version 2, as published by the Free Software Foundation. 
  20  # 
  21  # This program is distributed in the hope that it will be useful, 
  22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  24  # 
  25  # Copies of the GNU General Public License are available from 
  26  # the Free Software Foundation website, http://www.gnu.org/. 
  27  # 
  28  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  29  # 
  30  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  31  # Language : Python 2 (>= 2.7) 
  32  # Project  : Cedar Backup, release 2 
  33  # Purpose  : Provides general-purpose utilities. 
  34  # 
  35  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  36   
  37  ######################################################################## 
  38  # Module documentation 
  39  ######################################################################## 
  40   
  41  """ 
  42  Provides general-purpose utilities. 
  43   
  44  @sort: AbsolutePathList, ObjectTypeList, RestrictedContentList, RegexMatchList, 
  45         RegexList, _Vertex, DirectedGraph, PathResolverSingleton, 
  46         sortDict, convertSize, getUidGid, changeOwnership, splitCommandLine, 
  47         resolveCommand, executeCommand, calculateFileAge, encodePath, nullDevice, 
  48         deriveDayOfWeek, isStartOfWeek, buildNormalizedPath, 
  49         ISO_SECTOR_SIZE, BYTES_PER_SECTOR, 
  50         BYTES_PER_KBYTE, BYTES_PER_MBYTE, BYTES_PER_GBYTE, KBYTES_PER_MBYTE, MBYTES_PER_GBYTE, 
  51         SECONDS_PER_MINUTE, MINUTES_PER_HOUR, HOURS_PER_DAY, SECONDS_PER_DAY, 
  52         UNIT_BYTES, UNIT_KBYTES, UNIT_MBYTES, UNIT_GBYTES, UNIT_SECTORS 
  53   
  54  @var ISO_SECTOR_SIZE: Size of an ISO image sector, in bytes. 
  55  @var BYTES_PER_SECTOR: Number of bytes (B) per ISO sector. 
  56  @var BYTES_PER_KBYTE: Number of bytes (B) per kilobyte (kB). 
  57  @var BYTES_PER_MBYTE: Number of bytes (B) per megabyte (MB). 
  58  @var BYTES_PER_GBYTE: Number of bytes (B) per megabyte (GB). 
  59  @var KBYTES_PER_MBYTE: Number of kilobytes (kB) per megabyte (MB). 
  60  @var MBYTES_PER_GBYTE: Number of megabytes (MB) per gigabyte (GB). 
  61  @var SECONDS_PER_MINUTE: Number of seconds per minute. 
  62  @var MINUTES_PER_HOUR: Number of minutes per hour. 
  63  @var HOURS_PER_DAY: Number of hours per day. 
  64  @var SECONDS_PER_DAY: Number of seconds per day. 
  65  @var UNIT_BYTES: Constant representing the byte (B) unit for conversion. 
  66  @var UNIT_KBYTES: Constant representing the kilobyte (kB) unit for conversion. 
  67  @var UNIT_MBYTES: Constant representing the megabyte (MB) unit for conversion. 
  68  @var UNIT_GBYTES: Constant representing the gigabyte (GB) unit for conversion. 
  69  @var UNIT_SECTORS: Constant representing the ISO sector unit for conversion. 
  70   
  71  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  72  """ 
  73   
  74   
  75  ######################################################################## 
  76  # Imported modules 
  77  ######################################################################## 
  78   
  79  import sys 
  80  import math 
  81  import os 
  82  import re 
  83  import time 
  84  import logging 
  85  import string  # pylint: disable=W0402 
  86  from subprocess import Popen, STDOUT, PIPE 
  87   
  88  try: 
  89     import pwd 
  90     import grp 
  91     _UID_GID_AVAILABLE = True 
  92  except ImportError: 
  93     _UID_GID_AVAILABLE = False 
  94   
  95  from CedarBackup2.release import VERSION, DATE 
  96   
  97   
  98  ######################################################################## 
  99  # Module-wide constants and variables 
 100  ######################################################################## 
 101   
 102  logger = logging.getLogger("CedarBackup2.log.util") 
 103  outputLogger = logging.getLogger("CedarBackup2.output") 
 104   
 105  ISO_SECTOR_SIZE    = 2048.0   # in bytes 
 106  BYTES_PER_SECTOR   = ISO_SECTOR_SIZE 
 107   
 108  BYTES_PER_KBYTE    = 1024.0 
 109  KBYTES_PER_MBYTE   = 1024.0 
 110  MBYTES_PER_GBYTE   = 1024.0 
 111  BYTES_PER_MBYTE    = BYTES_PER_KBYTE * KBYTES_PER_MBYTE 
 112  BYTES_PER_GBYTE    = BYTES_PER_MBYTE * MBYTES_PER_GBYTE 
 113   
 114  SECONDS_PER_MINUTE = 60.0 
 115  MINUTES_PER_HOUR   = 60.0 
 116  HOURS_PER_DAY      = 24.0 
 117  SECONDS_PER_DAY    = SECONDS_PER_MINUTE * MINUTES_PER_HOUR * HOURS_PER_DAY 
 118   
 119  UNIT_BYTES         = 0 
 120  UNIT_KBYTES        = 1 
 121  UNIT_MBYTES        = 2 
 122  UNIT_GBYTES        = 4 
 123  UNIT_SECTORS       = 3 
 124   
 125  MTAB_FILE          = "/etc/mtab" 
 126   
 127  MOUNT_COMMAND      = [ "mount", ] 
 128  UMOUNT_COMMAND     = [ "umount", ] 
 129   
 130  DEFAULT_LANGUAGE   = "C" 
 131  LANG_VAR           = "LANG" 
 132  LOCALE_VARS        = [ "LC_ADDRESS", "LC_ALL", "LC_COLLATE", 
 133                         "LC_CTYPE", "LC_IDENTIFICATION", 
 134                         "LC_MEASUREMENT", "LC_MESSAGES", 
 135                         "LC_MONETARY", "LC_NAME", "LC_NUMERIC", 
 136                         "LC_PAPER", "LC_TELEPHONE", "LC_TIME", ] 
137 138 139 ######################################################################## 140 # UnorderedList class definition 141 ######################################################################## 142 143 -class UnorderedList(list):
144 145 """ 146 Class representing an "unordered list". 147 148 An "unordered list" is a list in which only the contents matter, not the 149 order in which the contents appear in the list. 150 151 For instance, we might be keeping track of set of paths in a list, because 152 it's convenient to have them in that form. However, for comparison 153 purposes, we would only care that the lists contain exactly the same 154 contents, regardless of order. 155 156 I have come up with two reasonable ways of doing this, plus a couple more 157 that would work but would be a pain to implement. My first method is to 158 copy and sort each list, comparing the sorted versions. This will only work 159 if two lists with exactly the same members are guaranteed to sort in exactly 160 the same order. The second way would be to create two Sets and then compare 161 the sets. However, this would lose information about any duplicates in 162 either list. I've decided to go with option #1 for now. I'll modify this 163 code if I run into problems in the future. 164 165 We override the original C{__eq__}, C{__ne__}, C{__ge__}, C{__gt__}, 166 C{__le__} and C{__lt__} list methods to change the definition of the various 167 comparison operators. In all cases, the comparison is changed to return the 168 result of the original operation I{but instead comparing sorted lists}. 169 This is going to be quite a bit slower than a normal list, so you probably 170 only want to use it on small lists. 171 """ 172
173 - def __eq__(self, other):
174 """ 175 Definition of C{==} operator for this class. 176 @param other: Other object to compare to. 177 @return: True/false depending on whether C{self == other}. 178 """ 179 if other is None: 180 return False 181 selfSorted = self[:] 182 otherSorted = other[:] 183 selfSorted.sort() 184 otherSorted.sort() 185 return selfSorted.__eq__(otherSorted)
186
187 - def __ne__(self, other):
188 """ 189 Definition of C{!=} operator for this class. 190 @param other: Other object to compare to. 191 @return: True/false depending on whether C{self != other}. 192 """ 193 if other is None: 194 return True 195 selfSorted = self[:] 196 otherSorted = other[:] 197 selfSorted.sort() 198 otherSorted.sort() 199 return selfSorted.__ne__(otherSorted)
200
201 - def __ge__(self, other):
202 """ 203 Definition of S{>=} operator for this class. 204 @param other: Other object to compare to. 205 @return: True/false depending on whether C{self >= other}. 206 """ 207 if other is None: 208 return True 209 selfSorted = self[:] 210 otherSorted = other[:] 211 selfSorted.sort() 212 otherSorted.sort() 213 return selfSorted.__ge__(otherSorted)
214
215 - def __gt__(self, other):
216 """ 217 Definition of C{>} operator for this class. 218 @param other: Other object to compare to. 219 @return: True/false depending on whether C{self > other}. 220 """ 221 if other is None: 222 return True 223 selfSorted = self[:] 224 otherSorted = other[:] 225 selfSorted.sort() 226 otherSorted.sort() 227 return selfSorted.__gt__(otherSorted)
228
229 - def __le__(self, other):
230 """ 231 Definition of S{<=} operator for this class. 232 @param other: Other object to compare to. 233 @return: True/false depending on whether C{self <= other}. 234 """ 235 if other is None: 236 return False 237 selfSorted = self[:] 238 otherSorted = other[:] 239 selfSorted.sort() 240 otherSorted.sort() 241 return selfSorted.__le__(otherSorted)
242
243 - def __lt__(self, other):
244 """ 245 Definition of C{<} operator for this class. 246 @param other: Other object to compare to. 247 @return: True/false depending on whether C{self < other}. 248 """ 249 if other is None: 250 return False 251 selfSorted = self[:] 252 otherSorted = other[:] 253 selfSorted.sort() 254 otherSorted.sort() 255 return selfSorted.__lt__(otherSorted)
256
257 258 ######################################################################## 259 # AbsolutePathList class definition 260 ######################################################################## 261 262 -class AbsolutePathList(UnorderedList):
263 264 """ 265 Class representing a list of absolute paths. 266 267 This is an unordered list. 268 269 We override the C{append}, C{insert} and C{extend} methods to ensure that 270 any item added to the list is an absolute path. 271 272 Each item added to the list is encoded using L{encodePath}. If we don't do 273 this, we have problems trying certain operations between strings and unicode 274 objects, particularly for "odd" filenames that can't be encoded in standard 275 ASCII. 276 """ 277
278 - def append(self, item):
279 """ 280 Overrides the standard C{append} method. 281 @raise ValueError: If item is not an absolute path. 282 """ 283 if not os.path.isabs(item): 284 raise ValueError("Not an absolute path: [%s]" % item) 285 list.append(self, encodePath(item))
286
287 - def insert(self, index, item):
288 """ 289 Overrides the standard C{insert} method. 290 @raise ValueError: If item is not an absolute path. 291 """ 292 if not os.path.isabs(item): 293 raise ValueError("Not an absolute path: [%s]" % item) 294 list.insert(self, index, encodePath(item))
295
296 - def extend(self, seq):
297 """ 298 Overrides the standard C{insert} method. 299 @raise ValueError: If any item is not an absolute path. 300 """ 301 for item in seq: 302 if not os.path.isabs(item): 303 raise ValueError("Not an absolute path: [%s]" % item) 304 for item in seq: 305 list.append(self, encodePath(item))
306
307 308 ######################################################################## 309 # ObjectTypeList class definition 310 ######################################################################## 311 312 -class ObjectTypeList(UnorderedList):
313 314 """ 315 Class representing a list containing only objects with a certain type. 316 317 This is an unordered list. 318 319 We override the C{append}, C{insert} and C{extend} methods to ensure that 320 any item added to the list matches the type that is requested. The 321 comparison uses the built-in C{isinstance}, which should allow subclasses of 322 of the requested type to be added to the list as well. 323 324 The C{objectName} value will be used in exceptions, i.e. C{"Item must be a 325 CollectDir object."} if C{objectName} is C{"CollectDir"}. 326 """ 327
328 - def __init__(self, objectType, objectName):
329 """ 330 Initializes a typed list for a particular type. 331 @param objectType: Type that the list elements must match. 332 @param objectName: Short string containing the "name" of the type. 333 """ 334 super(ObjectTypeList, self).__init__() 335 self.objectType = objectType 336 self.objectName = objectName
337
338 - def append(self, item):
339 """ 340 Overrides the standard C{append} method. 341 @raise ValueError: If item does not match requested type. 342 """ 343 if not isinstance(item, self.objectType): 344 raise ValueError("Item must be a %s object." % self.objectName) 345 list.append(self, item)
346
347 - def insert(self, index, item):
348 """ 349 Overrides the standard C{insert} method. 350 @raise ValueError: If item does not match requested type. 351 """ 352 if not isinstance(item, self.objectType): 353 raise ValueError("Item must be a %s object." % self.objectName) 354 list.insert(self, index, item)
355
356 - def extend(self, seq):
357 """ 358 Overrides the standard C{insert} method. 359 @raise ValueError: If item does not match requested type. 360 """ 361 for item in seq: 362 if not isinstance(item, self.objectType): 363 raise ValueError("All items must be %s objects." % self.objectName) 364 list.extend(self, seq)
365
366 367 ######################################################################## 368 # RestrictedContentList class definition 369 ######################################################################## 370 371 -class RestrictedContentList(UnorderedList):
372 373 """ 374 Class representing a list containing only object with certain values. 375 376 This is an unordered list. 377 378 We override the C{append}, C{insert} and C{extend} methods to ensure that 379 any item added to the list is among the valid values. We use a standard 380 comparison, so pretty much anything can be in the list of valid values. 381 382 The C{valuesDescr} value will be used in exceptions, i.e. C{"Item must be 383 one of values in VALID_ACTIONS"} if C{valuesDescr} is C{"VALID_ACTIONS"}. 384 385 @note: This class doesn't make any attempt to trap for nonsensical 386 arguments. All of the values in the values list should be of the same type 387 (i.e. strings). Then, all list operations also need to be of that type 388 (i.e. you should always insert or append just strings). If you mix types -- 389 for instance lists and strings -- you will likely see AttributeError 390 exceptions or other problems. 391 """ 392
393 - def __init__(self, valuesList, valuesDescr, prefix=None):
394 """ 395 Initializes a list restricted to containing certain values. 396 @param valuesList: List of valid values. 397 @param valuesDescr: Short string describing list of values. 398 @param prefix: Prefix to use in error messages (None results in prefix "Item") 399 """ 400 super(RestrictedContentList, self).__init__() 401 self.prefix = "Item" 402 if prefix is not None: self.prefix = prefix 403 self.valuesList = valuesList 404 self.valuesDescr = valuesDescr
405
406 - def append(self, item):
407 """ 408 Overrides the standard C{append} method. 409 @raise ValueError: If item is not in the values list. 410 """ 411 if item not in self.valuesList: 412 raise ValueError("%s must be one of the values in %s." % (self.prefix, self.valuesDescr)) 413 list.append(self, item)
414
415 - def insert(self, index, item):
416 """ 417 Overrides the standard C{insert} method. 418 @raise ValueError: If item is not in the values list. 419 """ 420 if item not in self.valuesList: 421 raise ValueError("%s must be one of the values in %s." % (self.prefix, self.valuesDescr)) 422 list.insert(self, index, item)
423
424 - def extend(self, seq):
425 """ 426 Overrides the standard C{insert} method. 427 @raise ValueError: If item is not in the values list. 428 """ 429 for item in seq: 430 if item not in self.valuesList: 431 raise ValueError("%s must be one of the values in %s." % (self.prefix, self.valuesDescr)) 432 list.extend(self, seq)
433
434 435 ######################################################################## 436 # RegexMatchList class definition 437 ######################################################################## 438 439 -class RegexMatchList(UnorderedList):
440 441 """ 442 Class representing a list containing only strings that match a regular expression. 443 444 If C{emptyAllowed} is passed in as C{False}, then empty strings are 445 explicitly disallowed, even if they happen to match the regular expression. 446 (C{None} values are always disallowed, since string operations are not 447 permitted on C{None}.) 448 449 This is an unordered list. 450 451 We override the C{append}, C{insert} and C{extend} methods to ensure that 452 any item added to the list matches the indicated regular expression. 453 454 @note: If you try to put values that are not strings into the list, you will 455 likely get either TypeError or AttributeError exceptions as a result. 456 """ 457
458 - def __init__(self, valuesRegex, emptyAllowed=True, prefix=None):
459 """ 460 Initializes a list restricted to containing certain values. 461 @param valuesRegex: Regular expression that must be matched, as a string 462 @param emptyAllowed: Indicates whether empty or None values are allowed. 463 @param prefix: Prefix to use in error messages (None results in prefix "Item") 464 """ 465 super(RegexMatchList, self).__init__() 466 self.prefix = "Item" 467 if prefix is not None: self.prefix = prefix 468 self.valuesRegex = valuesRegex 469 self.emptyAllowed = emptyAllowed 470 self.pattern = re.compile(self.valuesRegex)
471
472 - def append(self, item):
473 """ 474 Overrides the standard C{append} method. 475 @raise ValueError: If item is None 476 @raise ValueError: If item is empty and empty values are not allowed 477 @raise ValueError: If item does not match the configured regular expression 478 """ 479 if item is None or (not self.emptyAllowed and item == ""): 480 raise ValueError("%s cannot be empty." % self.prefix) 481 if not self.pattern.search(item): 482 raise ValueError("%s is not valid: [%s]" % (self.prefix, item)) 483 list.append(self, item)
484
485 - def insert(self, index, item):
486 """ 487 Overrides the standard C{insert} method. 488 @raise ValueError: If item is None 489 @raise ValueError: If item is empty and empty values are not allowed 490 @raise ValueError: If item does not match the configured regular expression 491 """ 492 if item is None or (not self.emptyAllowed and item == ""): 493 raise ValueError("%s cannot be empty." % self.prefix) 494 if not self.pattern.search(item): 495 raise ValueError("%s is not valid [%s]" % (self.prefix, item)) 496 list.insert(self, index, item)
497
498 - def extend(self, seq):
499 """ 500 Overrides the standard C{insert} method. 501 @raise ValueError: If any item is None 502 @raise ValueError: If any item is empty and empty values are not allowed 503 @raise ValueError: If any item does not match the configured regular expression 504 """ 505 for item in seq: 506 if item is None or (not self.emptyAllowed and item == ""): 507 raise ValueError("%s cannot be empty." % self.prefix) 508 if not self.pattern.search(item): 509 raise ValueError("%s is not valid: [%s]" % (self.prefix, item)) 510 list.extend(self, seq)
511
512 513 ######################################################################## 514 # RegexList class definition 515 ######################################################################## 516 517 -class RegexList(UnorderedList):
518 519 """ 520 Class representing a list of valid regular expression strings. 521 522 This is an unordered list. 523 524 We override the C{append}, C{insert} and C{extend} methods to ensure that 525 any item added to the list is a valid regular expression. 526 """ 527
528 - def append(self, item):
529 """ 530 Overrides the standard C{append} method. 531 @raise ValueError: If item is not an absolute path. 532 """ 533 try: 534 re.compile(item) 535 except re.error: 536 raise ValueError("Not a valid regular expression: [%s]" % item) 537 list.append(self, item)
538
539 - def insert(self, index, item):
540 """ 541 Overrides the standard C{insert} method. 542 @raise ValueError: If item is not an absolute path. 543 """ 544 try: 545 re.compile(item) 546 except re.error: 547 raise ValueError("Not a valid regular expression: [%s]" % item) 548 list.insert(self, index, item)
549
550 - def extend(self, seq):
551 """ 552 Overrides the standard C{insert} method. 553 @raise ValueError: If any item is not an absolute path. 554 """ 555 for item in seq: 556 try: 557 re.compile(item) 558 except re.error: 559 raise ValueError("Not a valid regular expression: [%s]" % item) 560 for item in seq: 561 list.append(self, item)
562
563 564 ######################################################################## 565 # Directed graph implementation 566 ######################################################################## 567 568 -class _Vertex(object):
569 570 """ 571 Represents a vertex (or node) in a directed graph. 572 """ 573
574 - def __init__(self, name):
575 """ 576 Constructor. 577 @param name: Name of this graph vertex. 578 @type name: String value. 579 """ 580 self.name = name 581 self.endpoints = [] 582 self.state = None
583
584 -class DirectedGraph(object):
585 586 """ 587 Represents a directed graph. 588 589 A graph B{G=(V,E)} consists of a set of vertices B{V} together with a set 590 B{E} of vertex pairs or edges. In a directed graph, each edge also has an 591 associated direction (from vertext B{v1} to vertex B{v2}). A C{DirectedGraph} 592 object provides a way to construct a directed graph and execute a depth- 593 first search. 594 595 This data structure was designed based on the graphing chapter in 596 U{The Algorithm Design Manual<http://www2.toki.or.id/book/AlgDesignManual/>}, 597 by Steven S. Skiena. 598 599 This class is intended to be used by Cedar Backup for dependency ordering. 600 Because of this, it's not quite general-purpose. Unlike a "general" graph, 601 every vertex in this graph has at least one edge pointing to it, from a 602 special "start" vertex. This is so no vertices get "lost" either because 603 they have no dependencies or because nothing depends on them. 604 """ 605 606 _UNDISCOVERED = 0 607 _DISCOVERED = 1 608 _EXPLORED = 2 609
610 - def __init__(self, name):
611 """ 612 Directed graph constructor. 613 614 @param name: Name of this graph. 615 @type name: String value. 616 """ 617 if name is None or name == "": 618 raise ValueError("Graph name must be non-empty.") 619 self._name = name 620 self._vertices = {} 621 self._startVertex = _Vertex(None) # start vertex is only vertex with no name
622
623 - def __repr__(self):
624 """ 625 Official string representation for class instance. 626 """ 627 return "DirectedGraph(%s)" % self.name
628
629 - def __str__(self):
630 """ 631 Informal string representation for class instance. 632 """ 633 return self.__repr__()
634
635 - def __cmp__(self, other):
636 """ 637 Definition of equals operator for this class. 638 @param other: Other object to compare to. 639 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 640 """ 641 # pylint: disable=W0212 642 if other is None: 643 return 1 644 if self.name != other.name: 645 if self.name < other.name: 646 return -1 647 else: 648 return 1 649 if self._vertices != other._vertices: 650 if self._vertices < other._vertices: 651 return -1 652 else: 653 return 1 654 return 0
655
656 - def _getName(self):
657 """ 658 Property target used to get the graph name. 659 """ 660 return self._name
661 662 name = property(_getName, None, None, "Name of the graph.") 663
664 - def createVertex(self, name):
665 """ 666 Creates a named vertex. 667 @param name: vertex name 668 @raise ValueError: If the vertex name is C{None} or empty. 669 """ 670 if name is None or name == "": 671 raise ValueError("Vertex name must be non-empty.") 672 vertex = _Vertex(name) 673 self._startVertex.endpoints.append(vertex) # so every vertex is connected at least once 674 self._vertices[name] = vertex
675
676 - def createEdge(self, start, finish):
677 """ 678 Adds an edge with an associated direction, from C{start} vertex to C{finish} vertex. 679 @param start: Name of start vertex. 680 @param finish: Name of finish vertex. 681 @raise ValueError: If one of the named vertices is unknown. 682 """ 683 try: 684 startVertex = self._vertices[start] 685 finishVertex = self._vertices[finish] 686 startVertex.endpoints.append(finishVertex) 687 except KeyError, e: 688 raise ValueError("Vertex [%s] could not be found." % e)
689
690 - def topologicalSort(self):
691 """ 692 Implements a topological sort of the graph. 693 694 This method also enforces that the graph is a directed acyclic graph, 695 which is a requirement of a topological sort. 696 697 A directed acyclic graph (or "DAG") is a directed graph with no directed 698 cycles. A topological sort of a DAG is an ordering on the vertices such 699 that all edges go from left to right. Only an acyclic graph can have a 700 topological sort, but any DAG has at least one topological sort. 701 702 Since a topological sort only makes sense for an acyclic graph, this 703 method throws an exception if a cycle is found. 704 705 A depth-first search only makes sense if the graph is acyclic. If the 706 graph contains any cycles, it is not possible to determine a consistent 707 ordering for the vertices. 708 709 @note: If a particular vertex has no edges, then its position in the 710 final list depends on the order in which the vertices were created in the 711 graph. If you're using this method to determine a dependency order, this 712 makes sense: a vertex with no dependencies can go anywhere (and will). 713 714 @return: Ordering on the vertices so that all edges go from left to right. 715 716 @raise ValueError: If a cycle is found in the graph. 717 """ 718 ordering = [] 719 for key in self._vertices: 720 vertex = self._vertices[key] 721 vertex.state = self._UNDISCOVERED 722 for key in self._vertices: 723 vertex = self._vertices[key] 724 if vertex.state == self._UNDISCOVERED: 725 self._topologicalSort(self._startVertex, ordering) 726 return ordering
727
728 - def _topologicalSort(self, vertex, ordering):
729 """ 730 Recursive depth first search function implementing topological sort. 731 @param vertex: Vertex to search 732 @param ordering: List of vertices in proper order 733 """ 734 vertex.state = self._DISCOVERED 735 for endpoint in vertex.endpoints: 736 if endpoint.state == self._UNDISCOVERED: 737 self._topologicalSort(endpoint, ordering) 738 elif endpoint.state != self._EXPLORED: 739 raise ValueError("Cycle found in graph (found '%s' while searching '%s')." % (vertex.name, endpoint.name)) 740 if vertex.name is not None: 741 ordering.insert(0, vertex.name) 742 vertex.state = self._EXPLORED
743
744 745 ######################################################################## 746 # PathResolverSingleton class definition 747 ######################################################################## 748 749 -class PathResolverSingleton(object):
750 751 """ 752 Singleton used for resolving executable paths. 753 754 Various functions throughout Cedar Backup (including extensions) need a way 755 to resolve the path of executables that they use. For instance, the image 756 functionality needs to find the C{mkisofs} executable, and the Subversion 757 extension needs to find the C{svnlook} executable. Cedar Backup's original 758 behavior was to assume that the simple name (C{"svnlook"} or whatever) was 759 available on the caller's C{$PATH}, and to fail otherwise. However, this 760 turns out to be less than ideal, since for instance the root user might not 761 always have executables like C{svnlook} in its path. 762 763 One solution is to specify a path (either via an absolute path or some sort 764 of path insertion or path appending mechanism) that would apply to the 765 C{executeCommand()} function. This is not difficult to implement, but it 766 seem like kind of a "big hammer" solution. Besides that, it might also 767 represent a security flaw (for instance, I prefer not to mess with root's 768 C{$PATH} on the application level if I don't have to). 769 770 The alternative is to set up some sort of configuration for the path to 771 certain executables, i.e. "find C{svnlook} in C{/usr/local/bin/svnlook}" or 772 whatever. This PathResolverSingleton aims to provide a good solution to the 773 mapping problem. Callers of all sorts (extensions or not) can get an 774 instance of the singleton. Then, they call the C{lookup} method to try and 775 resolve the executable they are looking for. Through the C{lookup} method, 776 the caller can also specify a default to use if a mapping is not found. 777 This way, with no real effort on the part of the caller, behavior can neatly 778 degrade to something equivalent to the current behavior if there is no 779 special mapping or if the singleton was never initialized in the first 780 place. 781 782 Even better, extensions automagically get access to the same resolver 783 functionality, and they don't even need to understand how the mapping 784 happens. All extension authors need to do is document what executables 785 their code requires, and the standard resolver configuration section will 786 meet their needs. 787 788 The class should be initialized once through the constructor somewhere in 789 the main routine. Then, the main routine should call the L{fill} method to 790 fill in the resolver's internal structures. Everyone else who needs to 791 resolve a path will get an instance of the class using L{getInstance} and 792 will then just call the L{lookup} method. 793 794 @cvar _instance: Holds a reference to the singleton 795 @ivar _mapping: Internal mapping from resource name to path. 796 """ 797 798 _instance = None # Holds a reference to singleton instance 799
800 - class _Helper(object):
801 """Helper class to provide a singleton factory method."""
802 - def __init__(self):
803 pass
804 - def __call__(self, *args, **kw):
805 # pylint: disable=W0212,R0201 806 if PathResolverSingleton._instance is None: 807 obj = PathResolverSingleton() 808 PathResolverSingleton._instance = obj 809 return PathResolverSingleton._instance
810 811 getInstance = _Helper() # Method that callers will use to get an instance 812
813 - def __init__(self):
814 """Singleton constructor, which just creates the singleton instance.""" 815 if PathResolverSingleton._instance is not None: 816 raise RuntimeError("Only one instance of PathResolverSingleton is allowed!") 817 PathResolverSingleton._instance = self 818 self._mapping = { }
819
820 - def lookup(self, name, default=None):
821 """ 822 Looks up name and returns the resolved path associated with the name. 823 @param name: Name of the path resource to resolve. 824 @param default: Default to return if resource cannot be resolved. 825 @return: Resolved path associated with name, or default if name can't be resolved. 826 """ 827 value = default 828 if name in self._mapping.keys(): 829 value = self._mapping[name] 830 logger.debug("Resolved command [%s] to [%s].", name, value) 831 return value
832
833 - def fill(self, mapping):
834 """ 835 Fills in the singleton's internal mapping from name to resource. 836 @param mapping: Mapping from resource name to path. 837 @type mapping: Dictionary mapping name to path, both as strings. 838 """ 839 self._mapping = { } 840 for key in mapping.keys(): 841 self._mapping[key] = mapping[key]
842
843 844 ######################################################################## 845 # Pipe class definition 846 ######################################################################## 847 848 -class Pipe(Popen):
849 """ 850 Specialized pipe class for use by C{executeCommand}. 851 852 The L{executeCommand} function needs a specialized way of interacting 853 with a pipe. First, C{executeCommand} only reads from the pipe, and 854 never writes to it. Second, C{executeCommand} needs a way to discard all 855 output written to C{stderr}, as a means of simulating the shell 856 C{2>/dev/null} construct. 857 """
858 - def __init__(self, cmd, bufsize=-1, ignoreStderr=False):
859 stderr = STDOUT 860 if ignoreStderr: 861 devnull = nullDevice() 862 stderr = os.open(devnull, os.O_RDWR) 863 Popen.__init__(self, shell=False, args=cmd, bufsize=bufsize, stdin=None, stdout=PIPE, stderr=stderr)
864
865 866 ######################################################################## 867 # Diagnostics class definition 868 ######################################################################## 869 870 -class Diagnostics(object):
871 872 """ 873 Class holding runtime diagnostic information. 874 875 Diagnostic information is information that is useful to get from users for 876 debugging purposes. I'm consolidating it all here into one object. 877 878 @sort: __init__, __repr__, __str__ 879 """ 880 # pylint: disable=R0201 881
882 - def __init__(self):
883 """ 884 Constructor for the C{Diagnostics} class. 885 """
886
887 - def __repr__(self):
888 """ 889 Official string representation for class instance. 890 """ 891 return "Diagnostics()"
892
893 - def __str__(self):
894 """ 895 Informal string representation for class instance. 896 """ 897 return self.__repr__()
898
899 - def getValues(self):
900 """ 901 Get a map containing all of the diagnostic values. 902 @return: Map from diagnostic name to diagnostic value. 903 """ 904 values = {} 905 values['version'] = self.version 906 values['interpreter'] = self.interpreter 907 values['platform'] = self.platform 908 values['encoding'] = self.encoding 909 values['locale'] = self.locale 910 values['timestamp'] = self.timestamp 911 return values
912
913 - def printDiagnostics(self, fd=sys.stdout, prefix=""):
914 """ 915 Pretty-print diagnostic information to a file descriptor. 916 @param fd: File descriptor used to print information. 917 @param prefix: Prefix string (if any) to place onto printed lines 918 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 919 """ 920 lines = self._buildDiagnosticLines(prefix) 921 for line in lines: 922 fd.write("%s\n" % line)
923
924 - def logDiagnostics(self, method, prefix=""):
925 """ 926 Pretty-print diagnostic information using a logger method. 927 @param method: Logger method to use for logging (i.e. logger.info) 928 @param prefix: Prefix string (if any) to place onto printed lines 929 """ 930 lines = self._buildDiagnosticLines(prefix) 931 for line in lines: 932 method("%s" % line)
933
934 - def _buildDiagnosticLines(self, prefix=""):
935 """ 936 Build a set of pretty-printed diagnostic lines. 937 @param prefix: Prefix string (if any) to place onto printed lines 938 @return: List of strings, not terminated by newlines. 939 """ 940 values = self.getValues() 941 keys = values.keys() 942 keys.sort() 943 tmax = Diagnostics._getMaxLength(keys) + 3 # three extra dots in output 944 lines = [] 945 for key in keys: 946 title = key.title() 947 title += (tmax - len(title)) * '.' 948 value = values[key] 949 line = "%s%s: %s" % (prefix, title, value) 950 lines.append(line) 951 return lines
952 953 @staticmethod
954 - def _getMaxLength(values):
955 """ 956 Get the maximum length from among a list of strings. 957 """ 958 tmax = 0 959 for value in values: 960 if len(value) > tmax: 961 tmax = len(value) 962 return tmax
963
964 - def _getVersion(self):
965 """ 966 Property target to get the Cedar Backup version. 967 """ 968 return "Cedar Backup %s (%s)" % (VERSION, DATE)
969
970 - def _getInterpreter(self):
971 """ 972 Property target to get the Python interpreter version. 973 """ 974 version = sys.version_info 975 return "Python %d.%d.%d (%s)" % (version[0], version[1], version[2], version[3])
976
977 - def _getEncoding(self):
978 """ 979 Property target to get the filesystem encoding. 980 """ 981 return sys.getfilesystemencoding() or sys.getdefaultencoding()
982
983 - def _getPlatform(self):
984 """ 985 Property target to get the operating system platform. 986 """ 987 try: 988 if sys.platform.startswith("win"): 989 windowsPlatforms = [ "Windows 3.1", "Windows 95/98/ME", "Windows NT/2000/XP", "Windows CE", ] 990 wininfo = sys.getwindowsversion() # pylint: disable=E1101 991 winversion = "%d.%d.%d" % (wininfo[0], wininfo[1], wininfo[2]) 992 winplatform = windowsPlatforms[wininfo[3]] 993 wintext = wininfo[4] # i.e. "Service Pack 2" 994 return "%s (%s %s %s)" % (sys.platform, winplatform, winversion, wintext) 995 else: 996 uname = os.uname() 997 sysname = uname[0] # i.e. Linux 998 release = uname[2] # i.e. 2.16.18-2 999 machine = uname[4] # i.e. i686 1000 return "%s (%s %s %s)" % (sys.platform, sysname, release, machine) 1001 except: 1002 return sys.platform
1003
1004 - def _getLocale(self):
1005 """ 1006 Property target to get the default locale that is in effect. 1007 """ 1008 try: 1009 import locale 1010 return locale.getdefaultlocale()[0] 1011 except: 1012 return "(unknown)"
1013
1014 - def _getTimestamp(self):
1015 """ 1016 Property target to get a current date/time stamp. 1017 """ 1018 try: 1019 import datetime 1020 return datetime.datetime.utcnow().ctime() + " UTC" 1021 except: 1022 return "(unknown)"
1023 1024 version = property(_getVersion, None, None, "Cedar Backup version.") 1025 interpreter = property(_getInterpreter, None, None, "Python interpreter version.") 1026 platform = property(_getPlatform, None, None, "Platform identifying information.") 1027 encoding = property(_getEncoding, None, None, "Filesystem encoding that is in effect.") 1028 locale = property(_getLocale, None, None, "Locale that is in effect.") 1029 timestamp = property(_getTimestamp, None, None, "Current timestamp.")
1030
1031 1032 ######################################################################## 1033 # General utility functions 1034 ######################################################################## 1035 1036 ###################### 1037 # sortDict() function 1038 ###################### 1039 1040 -def sortDict(d):
1041 """ 1042 Returns the keys of the dictionary sorted by value. 1043 1044 There are cuter ways to do this in Python 2.4, but we were originally 1045 attempting to stay compatible with Python 2.3. 1046 1047 @param d: Dictionary to operate on 1048 @return: List of dictionary keys sorted in order by dictionary value. 1049 """ 1050 items = d.items() 1051 items.sort(lambda x, y: cmp(x[1], y[1])) 1052 return [key for key, value in items]
1053
1054 1055 ######################## 1056 # removeKeys() function 1057 ######################## 1058 1059 -def removeKeys(d, keys):
1060 """ 1061 Removes all of the keys from the dictionary. 1062 The dictionary is altered in-place. 1063 Each key must exist in the dictionary. 1064 @param d: Dictionary to operate on 1065 @param keys: List of keys to remove 1066 @raise KeyError: If one of the keys does not exist 1067 """ 1068 for key in keys: 1069 del d[key]
1070
1071 1072 ######################### 1073 # convertSize() function 1074 ######################### 1075 1076 -def convertSize(size, fromUnit, toUnit):
1077 """ 1078 Converts a size in one unit to a size in another unit. 1079 1080 This is just a convenience function so that the functionality can be 1081 implemented in just one place. Internally, we convert values to bytes and 1082 then to the final unit. 1083 1084 The available units are: 1085 1086 - C{UNIT_BYTES} - Bytes 1087 - C{UNIT_KBYTES} - Kilobytes, where 1 kB = 1024 B 1088 - C{UNIT_MBYTES} - Megabytes, where 1 MB = 1024 kB 1089 - C{UNIT_GBYTES} - Gigabytes, where 1 GB = 1024 MB 1090 - C{UNIT_SECTORS} - Sectors, where 1 sector = 2048 B 1091 1092 @param size: Size to convert 1093 @type size: Integer or float value in units of C{fromUnit} 1094 1095 @param fromUnit: Unit to convert from 1096 @type fromUnit: One of the units listed above 1097 1098 @param toUnit: Unit to convert to 1099 @type toUnit: One of the units listed above 1100 1101 @return: Number converted to new unit, as a float. 1102 @raise ValueError: If one of the units is invalid. 1103 """ 1104 if size is None: 1105 raise ValueError("Cannot convert size of None.") 1106 if fromUnit == UNIT_BYTES: 1107 byteSize = float(size) 1108 elif fromUnit == UNIT_KBYTES: 1109 byteSize = float(size) * BYTES_PER_KBYTE 1110 elif fromUnit == UNIT_MBYTES: 1111 byteSize = float(size) * BYTES_PER_MBYTE 1112 elif fromUnit == UNIT_GBYTES: 1113 byteSize = float(size) * BYTES_PER_GBYTE 1114 elif fromUnit == UNIT_SECTORS: 1115 byteSize = float(size) * BYTES_PER_SECTOR 1116 else: 1117 raise ValueError("Unknown 'from' unit %s." % fromUnit) 1118 if toUnit == UNIT_BYTES: 1119 return byteSize 1120 elif toUnit == UNIT_KBYTES: 1121 return byteSize / BYTES_PER_KBYTE 1122 elif toUnit == UNIT_MBYTES: 1123 return byteSize / BYTES_PER_MBYTE 1124 elif toUnit == UNIT_GBYTES: 1125 return byteSize / BYTES_PER_GBYTE 1126 elif toUnit == UNIT_SECTORS: 1127 return byteSize / BYTES_PER_SECTOR 1128 else: 1129 raise ValueError("Unknown 'to' unit %s." % toUnit)
1130
1131 1132 ########################## 1133 # displayBytes() function 1134 ########################## 1135 1136 -def displayBytes(bytes, digits=2): # pylint: disable=W0622
1137 """ 1138 Format a byte quantity so it can be sensibly displayed. 1139 1140 It's rather difficult to look at a number like "72372224 bytes" and get any 1141 meaningful information out of it. It would be more useful to see something 1142 like "69.02 MB". That's what this function does. Any time you want to display 1143 a byte value, i.e.:: 1144 1145 print "Size: %s bytes" % bytes 1146 1147 Call this function instead:: 1148 1149 print "Size: %s" % displayBytes(bytes) 1150 1151 What comes out will be sensibly formatted. The indicated number of digits 1152 will be listed after the decimal point, rounded based on whatever rules are 1153 used by Python's standard C{%f} string format specifier. (Values less than 1 1154 kB will be listed in bytes and will not have a decimal point, since the 1155 concept of a fractional byte is nonsensical.) 1156 1157 @param bytes: Byte quantity. 1158 @type bytes: Integer number of bytes. 1159 1160 @param digits: Number of digits to display after the decimal point. 1161 @type digits: Integer value, typically 2-5. 1162 1163 @return: String, formatted for sensible display. 1164 """ 1165 if bytes is None: 1166 raise ValueError("Cannot display byte value of None.") 1167 bytes = float(bytes) 1168 if math.fabs(bytes) < BYTES_PER_KBYTE: 1169 fmt = "%.0f bytes" 1170 value = bytes 1171 elif math.fabs(bytes) < BYTES_PER_MBYTE: 1172 fmt = "%." + "%d" % digits + "f kB" 1173 value = bytes / BYTES_PER_KBYTE 1174 elif math.fabs(bytes) < BYTES_PER_GBYTE: 1175 fmt = "%." + "%d" % digits + "f MB" 1176 value = bytes / BYTES_PER_MBYTE 1177 else: 1178 fmt = "%." + "%d" % digits + "f GB" 1179 value = bytes / BYTES_PER_GBYTE 1180 return fmt % value 1181
1182 1183 ################################## 1184 # getFunctionReference() function 1185 ################################## 1186 1187 -def getFunctionReference(module, function):
1188 """ 1189 Gets a reference to a named function. 1190 1191 This does some hokey-pokey to get back a reference to a dynamically named 1192 function. For instance, say you wanted to get a reference to the 1193 C{os.path.isdir} function. You could use:: 1194 1195 myfunc = getFunctionReference("os.path", "isdir") 1196 1197 Although we won't bomb out directly, behavior is pretty much undefined if 1198 you pass in C{None} or C{""} for either C{module} or C{function}. 1199 1200 The only validation we enforce is that whatever we get back must be 1201 callable. 1202 1203 I derived this code based on the internals of the Python unittest 1204 implementation. I don't claim to completely understand how it works. 1205 1206 @param module: Name of module associated with function. 1207 @type module: Something like "os.path" or "CedarBackup2.util" 1208 1209 @param function: Name of function 1210 @type function: Something like "isdir" or "getUidGid" 1211 1212 @return: Reference to function associated with name. 1213 1214 @raise ImportError: If the function cannot be found. 1215 @raise ValueError: If the resulting reference is not callable. 1216 1217 @copyright: Some of this code, prior to customization, was originally part 1218 of the Python 2.3 codebase. Python code is copyright (c) 2001, 2002 Python 1219 Software Foundation; All Rights Reserved. 1220 """ 1221 parts = [] 1222 if module is not None and module != "": 1223 parts = module.split(".") 1224 if function is not None and function != "": 1225 parts.append(function) 1226 copy = parts[:] 1227 while copy: 1228 try: 1229 module = __import__(string.join(copy, ".")) 1230 break 1231 except ImportError: 1232 del copy[-1] 1233 if not copy: raise 1234 parts = parts[1:] 1235 obj = module 1236 for part in parts: 1237 obj = getattr(obj, part) 1238 if not callable(obj): 1239 raise ValueError("Reference to %s.%s is not callable." % (module, function)) 1240 return obj
1241
1242 1243 ####################### 1244 # getUidGid() function 1245 ####################### 1246 1247 -def getUidGid(user, group):
1248 """ 1249 Get the uid/gid associated with a user/group pair 1250 1251 This is a no-op if user/group functionality is not available on the platform. 1252 1253 @param user: User name 1254 @type user: User name as a string 1255 1256 @param group: Group name 1257 @type group: Group name as a string 1258 1259 @return: Tuple C{(uid, gid)} matching passed-in user and group. 1260 @raise ValueError: If the ownership user/group values are invalid 1261 """ 1262 if _UID_GID_AVAILABLE: 1263 try: 1264 uid = pwd.getpwnam(user)[2] 1265 gid = grp.getgrnam(group)[2] 1266 return (uid, gid) 1267 except Exception, e: 1268 logger.debug("Error looking up uid and gid for [%s:%s]: %s", user, group, e) 1269 raise ValueError("Unable to lookup up uid and gid for passed in user/group.") 1270 else: 1271 return (0, 0)
1272
1273 1274 ############################# 1275 # changeOwnership() function 1276 ############################# 1277 1278 -def changeOwnership(path, user, group):
1279 """ 1280 Changes ownership of path to match the user and group. 1281 1282 This is a no-op if user/group functionality is not available on the 1283 platform, or if the either passed-in user or group is C{None}. Further, we 1284 won't even try to do it unless running as root, since it's unlikely to work. 1285 1286 @param path: Path whose ownership to change. 1287 @param user: User which owns file. 1288 @param group: Group which owns file. 1289 """ 1290 if _UID_GID_AVAILABLE: 1291 if user is None or group is None: 1292 logger.debug("User or group is None, so not attempting to change owner on [%s].", path) 1293 elif not isRunningAsRoot(): 1294 logger.debug("Not root, so not attempting to change owner on [%s].", path) 1295 else: 1296 try: 1297 (uid, gid) = getUidGid(user, group) 1298 os.chown(path, uid, gid) 1299 except Exception, e: 1300 logger.error("Error changing ownership of [%s]: %s", path, e)
1301
1302 1303 ############################# 1304 # isRunningAsRoot() function 1305 ############################# 1306 1307 -def isRunningAsRoot():
1308 """ 1309 Indicates whether the program is running as the root user. 1310 """ 1311 return os.getuid() == 0
1312
1313 1314 ############################## 1315 # splitCommandLine() function 1316 ############################## 1317 1318 -def splitCommandLine(commandLine):
1319 """ 1320 Splits a command line string into a list of arguments. 1321 1322 Unfortunately, there is no "standard" way to parse a command line string, 1323 and it's actually not an easy problem to solve portably (essentially, we 1324 have to emulate the shell argument-processing logic). This code only 1325 respects double quotes (C{"}) for grouping arguments, not single quotes 1326 (C{'}). Make sure you take this into account when building your command 1327 line. 1328 1329 Incidentally, I found this particular parsing method while digging around in 1330 Google Groups, and I tweaked it for my own use. 1331 1332 @param commandLine: Command line string 1333 @type commandLine: String, i.e. "cback --verbose stage store" 1334 1335 @return: List of arguments, suitable for passing to C{popen2}. 1336 1337 @raise ValueError: If the command line is None. 1338 """ 1339 if commandLine is None: 1340 raise ValueError("Cannot split command line of None.") 1341 fields = re.findall('[^ "]+|"[^"]+"', commandLine) 1342 fields = [field.replace('"', '') for field in fields] 1343 return fields
1344
1345 1346 ############################ 1347 # resolveCommand() function 1348 ############################ 1349 1350 -def resolveCommand(command):
1351 """ 1352 Resolves the real path to a command through the path resolver mechanism. 1353 1354 Both extensions and standard Cedar Backup functionality need a way to 1355 resolve the "real" location of various executables. Normally, they assume 1356 that these executables are on the system path, but some callers need to 1357 specify an alternate location. 1358 1359 Ideally, we want to handle this configuration in a central location. The 1360 Cedar Backup path resolver mechanism (a singleton called 1361 L{PathResolverSingleton}) provides the central location to store the 1362 mappings. This function wraps access to the singleton, and is what all 1363 functions (extensions or standard functionality) should call if they need to 1364 find a command. 1365 1366 The passed-in command must actually be a list, in the standard form used by 1367 all existing Cedar Backup code (something like C{["svnlook", ]}). The 1368 lookup will actually be done on the first element in the list, and the 1369 returned command will always be in list form as well. 1370 1371 If the passed-in command can't be resolved or no mapping exists, then the 1372 command itself will be returned unchanged. This way, we neatly fall back on 1373 default behavior if we have no sensible alternative. 1374 1375 @param command: Command to resolve. 1376 @type command: List form of command, i.e. C{["svnlook", ]}. 1377 1378 @return: Path to command or just command itself if no mapping exists. 1379 """ 1380 singleton = PathResolverSingleton.getInstance() 1381 name = command[0] 1382 result = command[:] 1383 result[0] = singleton.lookup(name, name) 1384 return result
1385
1386 1387 ############################ 1388 # executeCommand() function 1389 ############################ 1390 1391 -def executeCommand(command, args, returnOutput=False, ignoreStderr=False, doNotLog=False, outputFile=None):
1392 """ 1393 Executes a shell command, hopefully in a safe way. 1394 1395 This function exists to replace direct calls to C{os.popen} in the Cedar 1396 Backup code. It's not safe to call a function such as C{os.popen()} with 1397 untrusted arguments, since that can cause problems if the string contains 1398 non-safe variables or other constructs (imagine that the argument is 1399 C{$WHATEVER}, but C{$WHATEVER} contains something like C{"; rm -fR ~/; 1400 echo"} in the current environment). 1401 1402 Instead, it's safer to pass a list of arguments in the style supported bt 1403 C{popen2} or C{popen4}. This function actually uses a specialized C{Pipe} 1404 class implemented using either C{subprocess.Popen} or C{popen2.Popen4}. 1405 1406 Under the normal case, this function will return a tuple of C{(status, 1407 None)} where the status is the wait-encoded return status of the call per 1408 the C{popen2.Popen4} documentation. If C{returnOutput} is passed in as 1409 C{True}, the function will return a tuple of C{(status, output)} where 1410 C{output} is a list of strings, one entry per line in the output from the 1411 command. Output is always logged to the C{outputLogger.info()} target, 1412 regardless of whether it's returned. 1413 1414 By default, C{stdout} and C{stderr} will be intermingled in the output. 1415 However, if you pass in C{ignoreStderr=True}, then only C{stdout} will be 1416 included in the output. 1417 1418 The C{doNotLog} parameter exists so that callers can force the function to 1419 not log command output to the debug log. Normally, you would want to log. 1420 However, if you're using this function to write huge output files (i.e. 1421 database backups written to C{stdout}) then you might want to avoid putting 1422 all that information into the debug log. 1423 1424 The C{outputFile} parameter exists to make it easier for a caller to push 1425 output into a file, i.e. as a substitute for redirection to a file. If this 1426 value is passed in, each time a line of output is generated, it will be 1427 written to the file using C{outputFile.write()}. At the end, the file 1428 descriptor will be flushed using C{outputFile.flush()}. The caller 1429 maintains responsibility for closing the file object appropriately. 1430 1431 @note: I know that it's a bit confusing that the command and the arguments 1432 are both lists. I could have just required the caller to pass in one big 1433 list. However, I think it makes some sense to keep the command (the 1434 constant part of what we're executing, i.e. C{"scp -B"}) separate from its 1435 arguments, even if they both end up looking kind of similar. 1436 1437 @note: You cannot redirect output via shell constructs (i.e. C{>file}, 1438 C{2>/dev/null}, etc.) using this function. The redirection string would be 1439 passed to the command just like any other argument. However, you can 1440 implement the equivalent to redirection using C{ignoreStderr} and 1441 C{outputFile}, as discussed above. 1442 1443 @note: The operating system environment is partially sanitized before 1444 the command is invoked. See L{sanitizeEnvironment} for details. 1445 1446 @param command: Shell command to execute 1447 @type command: List of individual arguments that make up the command 1448 1449 @param args: List of arguments to the command 1450 @type args: List of additional arguments to the command 1451 1452 @param returnOutput: Indicates whether to return the output of the command 1453 @type returnOutput: Boolean C{True} or C{False} 1454 1455 @param ignoreStderr: Whether stderr should be discarded 1456 @type ignoreStderr: Boolean True or False 1457 1458 @param doNotLog: Indicates that output should not be logged. 1459 @type doNotLog: Boolean C{True} or C{False} 1460 1461 @param outputFile: File object that all output should be written to. 1462 @type outputFile: File object as returned from C{open()} or C{file()}. 1463 1464 @return: Tuple of C{(result, output)} as described above. 1465 """ 1466 logger.debug("Executing command %s with args %s.", command, args) 1467 outputLogger.info("Executing command %s with args %s.", command, args) 1468 if doNotLog: 1469 logger.debug("Note: output will not be logged, per the doNotLog flag.") 1470 outputLogger.info("Note: output will not be logged, per the doNotLog flag.") 1471 output = [] 1472 fields = command[:] # make sure to copy it so we don't destroy it 1473 fields.extend(args) 1474 try: 1475 sanitizeEnvironment() # make sure we have a consistent environment 1476 try: 1477 pipe = Pipe(fields, ignoreStderr=ignoreStderr) 1478 except OSError: 1479 # On some platforms (i.e. Cygwin) this intermittently fails the first time we do it. 1480 # So, we attempt it a second time and if that works, we just go on as usual. 1481 # The problem appears to be that we sometimes get a bad stderr file descriptor. 1482 pipe = Pipe(fields, ignoreStderr=ignoreStderr) 1483 while True: 1484 line = pipe.stdout.readline() 1485 if not line: break 1486 if returnOutput: output.append(line) 1487 if outputFile is not None: outputFile.write(line) 1488 if not doNotLog: outputLogger.info(line[:-1]) # this way the log will (hopefully) get updated in realtime 1489 if outputFile is not None: 1490 try: # note, not every file-like object can be flushed 1491 outputFile.flush() 1492 except: pass 1493 if returnOutput: 1494 return (pipe.wait(), output) 1495 else: 1496 return (pipe.wait(), None) 1497 except OSError, e: 1498 try: 1499 if returnOutput: 1500 if output != []: 1501 return (pipe.wait(), output) 1502 else: 1503 return (pipe.wait(), [ e, ]) 1504 else: 1505 return (pipe.wait(), None) 1506 except UnboundLocalError: # pipe not set 1507 if returnOutput: 1508 return (256, []) 1509 else: 1510 return (256, None)
1511
1512 1513 ############################## 1514 # calculateFileAge() function 1515 ############################## 1516 1517 -def calculateFileAge(path):
1518 """ 1519 Calculates the age (in days) of a file. 1520 1521 The "age" of a file is the amount of time since the file was last used, per 1522 the most recent of the file's C{st_atime} and C{st_mtime} values. 1523 1524 Technically, we only intend this function to work with files, but it will 1525 probably work with anything on the filesystem. 1526 1527 @param path: Path to a file on disk. 1528 1529 @return: Age of the file in days (possibly fractional). 1530 @raise OSError: If the file doesn't exist. 1531 """ 1532 currentTime = int(time.time()) 1533 fileStats = os.stat(path) 1534 lastUse = max(fileStats.st_atime, fileStats.st_mtime) # "most recent" is "largest" 1535 ageInSeconds = currentTime - lastUse 1536 ageInDays = ageInSeconds / SECONDS_PER_DAY 1537 return ageInDays
1538
1539 1540 ################### 1541 # mount() function 1542 ################### 1543 1544 -def mount(devicePath, mountPoint, fsType):
1545 """ 1546 Mounts the indicated device at the indicated mount point. 1547 1548 For instance, to mount a CD, you might use device path C{/dev/cdrw}, mount 1549 point C{/media/cdrw} and filesystem type C{iso9660}. You can safely use any 1550 filesystem type that is supported by C{mount} on your platform. If the type 1551 is C{None}, we'll attempt to let C{mount} auto-detect it. This may or may 1552 not work on all systems. 1553 1554 @note: This only works on platforms that have a concept of "mounting" a 1555 filesystem through a command-line C{"mount"} command, like UNIXes. It 1556 won't work on Windows. 1557 1558 @param devicePath: Path of device to be mounted. 1559 @param mountPoint: Path that device should be mounted at. 1560 @param fsType: Type of the filesystem assumed to be available via the device. 1561 1562 @raise IOError: If the device cannot be mounted. 1563 """ 1564 if fsType is None: 1565 args = [ devicePath, mountPoint ] 1566 else: 1567 args = [ "-t", fsType, devicePath, mountPoint ] 1568 command = resolveCommand(MOUNT_COMMAND) 1569 result = executeCommand(command, args, returnOutput=False, ignoreStderr=True)[0] 1570 if result != 0: 1571 raise IOError("Error [%d] mounting [%s] at [%s] as [%s]." % (result, devicePath, mountPoint, fsType))
1572
1573 1574 ##################### 1575 # unmount() function 1576 ##################### 1577 1578 -def unmount(mountPoint, removeAfter=False, attempts=1, waitSeconds=0):
1579 """ 1580 Unmounts whatever device is mounted at the indicated mount point. 1581 1582 Sometimes, it might not be possible to unmount the mount point immediately, 1583 if there are still files open there. Use the C{attempts} and C{waitSeconds} 1584 arguments to indicate how many unmount attempts to make and how many seconds 1585 to wait between attempts. If you pass in zero attempts, no attempts will be 1586 made (duh). 1587 1588 If the indicated mount point is not really a mount point per 1589 C{os.path.ismount()}, then it will be ignored. This seems to be a safer 1590 check then looking through C{/etc/mtab}, since C{ismount()} is already in 1591 the Python standard library and is documented as working on all POSIX 1592 systems. 1593 1594 If C{removeAfter} is C{True}, then the mount point will be removed using 1595 C{os.rmdir()} after the unmount action succeeds. If for some reason the 1596 mount point is not a directory, then it will not be removed. 1597 1598 @note: This only works on platforms that have a concept of "mounting" a 1599 filesystem through a command-line C{"mount"} command, like UNIXes. It 1600 won't work on Windows. 1601 1602 @param mountPoint: Mount point to be unmounted. 1603 @param removeAfter: Remove the mount point after unmounting it. 1604 @param attempts: Number of times to attempt the unmount. 1605 @param waitSeconds: Number of seconds to wait between repeated attempts. 1606 1607 @raise IOError: If the mount point is still mounted after attempts are exhausted. 1608 """ 1609 if os.path.ismount(mountPoint): 1610 for attempt in range(0, attempts): 1611 logger.debug("Making attempt %d to unmount [%s].", attempt, mountPoint) 1612 command = resolveCommand(UMOUNT_COMMAND) 1613 result = executeCommand(command, [ mountPoint, ], returnOutput=False, ignoreStderr=True)[0] 1614 if result != 0: 1615 logger.error("Error [%d] unmounting [%s] on attempt %d.", result, mountPoint, attempt) 1616 elif os.path.ismount(mountPoint): 1617 logger.error("After attempt %d, [%s] is still mounted.", attempt, mountPoint) 1618 else: 1619 logger.debug("Successfully unmounted [%s] on attempt %d.", mountPoint, attempt) 1620 break # this will cause us to skip the loop else: clause 1621 if attempt+1 < attempts: # i.e. this isn't the last attempt 1622 if waitSeconds > 0: 1623 logger.info("Sleeping %d second(s) before next unmount attempt.", waitSeconds) 1624 time.sleep(waitSeconds) 1625 else: 1626 if os.path.ismount(mountPoint): 1627 raise IOError("Unable to unmount [%s] after %d attempts." % (mountPoint, attempts)) 1628 logger.info("Mount point [%s] seems to have finally gone away.", mountPoint) 1629 if os.path.isdir(mountPoint) and removeAfter: 1630 logger.debug("Removing mount point [%s].", mountPoint) 1631 os.rmdir(mountPoint)
1632
1633 1634 ########################### 1635 # deviceMounted() function 1636 ########################### 1637 1638 -def deviceMounted(devicePath):
1639 """ 1640 Indicates whether a specific filesystem device is currently mounted. 1641 1642 We determine whether the device is mounted by looking through the system's 1643 C{mtab} file. This file shows every currently-mounted filesystem, ordered 1644 by device. We only do the check if the C{mtab} file exists and is readable. 1645 Otherwise, we assume that the device is not mounted. 1646 1647 @note: This only works on platforms that have a concept of an mtab file 1648 to show mounted volumes, like UNIXes. It won't work on Windows. 1649 1650 @param devicePath: Path of device to be checked 1651 1652 @return: True if device is mounted, false otherwise. 1653 """ 1654 if os.path.exists(MTAB_FILE) and os.access(MTAB_FILE, os.R_OK): 1655 realPath = os.path.realpath(devicePath) 1656 lines = open(MTAB_FILE).readlines() 1657 for line in lines: 1658 (mountDevice, mountPoint, remainder) = line.split(None, 2) 1659 if mountDevice in [ devicePath, realPath, ]: 1660 logger.debug("Device [%s] is mounted at [%s].", devicePath, mountPoint) 1661 return True 1662 return False
1663
1664 1665 ######################## 1666 # encodePath() function 1667 ######################## 1668 1669 -def encodePath(path):
1670 1671 r""" 1672 Safely encodes a filesystem path. 1673 1674 Many Python filesystem functions, such as C{os.listdir}, behave differently 1675 if they are passed unicode arguments versus simple string arguments. For 1676 instance, C{os.listdir} generally returns unicode path names if it is passed 1677 a unicode argument, and string pathnames if it is passed a string argument. 1678 1679 However, this behavior often isn't as consistent as we might like. As an example, 1680 C{os.listdir} "gives up" if it finds a filename that it can't properly encode 1681 given the current locale settings. This means that the returned list is 1682 a mixed set of unicode and simple string paths. This has consequences later, 1683 because other filesystem functions like C{os.path.join} will blow up if they 1684 are given one string path and one unicode path. 1685 1686 On comp.lang.python, Martin v. Löwis explained the C{os.listdir} behavior 1687 like this:: 1688 1689 The operating system (POSIX) does not have the inherent notion that file 1690 names are character strings. Instead, in POSIX, file names are primarily 1691 byte strings. There are some bytes which are interpreted as characters 1692 (e.g. '\x2e', which is '.', or '\x2f', which is '/'), but apart from 1693 that, most OS layers think these are just bytes. 1694 1695 Now, most *people* think that file names are character strings. To 1696 interpret a file name as a character string, you need to know what the 1697 encoding is to interpret the file names (which are byte strings) as 1698 character strings. 1699 1700 There is, unfortunately, no operating system API to carry the notion of a 1701 file system encoding. By convention, the locale settings should be used 1702 to establish this encoding, in particular the LC_CTYPE facet of the 1703 locale. This is defined in the environment variables LC_CTYPE, LC_ALL, 1704 and LANG (searched in this order). 1705 1706 If LANG is not set, the "C" locale is assumed, which uses ASCII as its 1707 file system encoding. In this locale, '\xe2\x99\xaa\xe2\x99\xac' is not a 1708 valid file name (at least it cannot be interpreted as characters, and 1709 hence not be converted to Unicode). 1710 1711 Now, your Python script has requested that all file names *should* be 1712 returned as character (ie. Unicode) strings, but Python cannot comply, 1713 since there is no way to find out what this byte string means, in terms 1714 of characters. 1715 1716 So we have three options: 1717 1718 1. Skip this string, only return the ones that can be converted to Unicode. 1719 Give the user the impression the file does not exist. 1720 2. Return the string as a byte string 1721 3. Refuse to listdir altogether, raising an exception (i.e. return nothing) 1722 1723 Python has chosen alternative 2, allowing the application to implement 1 1724 or 3 on top of that if it wants to (or come up with other strategies, 1725 such as user feedback). 1726 1727 As a solution, he suggests that rather than passing unicode paths into the 1728 filesystem functions, that I should sensibly encode the path first. That is 1729 what this function accomplishes. Any function which takes a filesystem path 1730 as an argument should encode it first, before using it for any other purpose. 1731 1732 I confess I still don't completely understand how this works. On a system 1733 with filesystem encoding "ISO-8859-1", a path C{u"\xe2\x99\xaa\xe2\x99\xac"} 1734 is converted into the string C{"\xe2\x99\xaa\xe2\x99\xac"}. However, on a 1735 system with a "utf-8" encoding, the result is a completely different string: 1736 C{"\xc3\xa2\xc2\x99\xc2\xaa\xc3\xa2\xc2\x99\xc2\xac"}. A quick test where I 1737 write to the first filename and open the second proves that the two strings 1738 represent the same file on disk, which is all I really care about. 1739 1740 @note: As a special case, if C{path} is C{None}, then this function will 1741 return C{None}. 1742 1743 @note: To provide several examples of encoding values, my Debian sarge box 1744 with an ext3 filesystem has Python filesystem encoding C{ISO-8859-1}. User 1745 Anarcat's Debian box with a xfs filesystem has filesystem encoding 1746 C{ANSI_X3.4-1968}. Both my iBook G4 running Mac OS X 10.4 and user Dag 1747 Rende's SuSE 9.3 box both have filesystem encoding C{UTF-8}. 1748 1749 @note: Just because a filesystem has C{UTF-8} encoding doesn't mean that it 1750 will be able to handle all extended-character filenames. For instance, 1751 certain extended-character (but not UTF-8) filenames -- like the ones in the 1752 regression test tar file C{test/data/tree13.tar.gz} -- are not valid under 1753 Mac OS X, and it's not even possible to extract them from the tarfile on 1754 that platform. 1755 1756 @param path: Path to encode 1757 1758 @return: Path, as a string, encoded appropriately 1759 @raise ValueError: If the path cannot be encoded properly. 1760 """ 1761 if path is None: 1762 return path 1763 try: 1764 if isinstance(path, unicode): 1765 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() 1766 path = path.encode(encoding) 1767 return path 1768 except UnicodeError: 1769 raise ValueError("Path could not be safely encoded as %s." % encoding)
1770
1771 1772 ######################## 1773 # nullDevice() function 1774 ######################## 1775 1776 -def nullDevice():
1777 """ 1778 Attempts to portably return the null device on this system. 1779 1780 The null device is something like C{/dev/null} on a UNIX system. The name 1781 varies on other platforms. 1782 """ 1783 return os.devnull
1784
1785 1786 ############################## 1787 # deriveDayOfWeek() function 1788 ############################## 1789 1790 -def deriveDayOfWeek(dayName):
1791 """ 1792 Converts English day name to numeric day of week as from C{time.localtime}. 1793 1794 For instance, the day C{monday} would be converted to the number C{0}. 1795 1796 @param dayName: Day of week to convert 1797 @type dayName: string, i.e. C{"monday"}, C{"tuesday"}, etc. 1798 1799 @returns: Integer, where Monday is 0 and Sunday is 6; or -1 if no conversion is possible. 1800 """ 1801 if dayName.lower() == "monday": 1802 return 0 1803 elif dayName.lower() == "tuesday": 1804 return 1 1805 elif dayName.lower() == "wednesday": 1806 return 2 1807 elif dayName.lower() == "thursday": 1808 return 3 1809 elif dayName.lower() == "friday": 1810 return 4 1811 elif dayName.lower() == "saturday": 1812 return 5 1813 elif dayName.lower() == "sunday": 1814 return 6 1815 else: 1816 return -1 # What else can we do?? Thrown an exception, I guess.
1817
1818 1819 ########################### 1820 # isStartOfWeek() function 1821 ########################### 1822 1823 -def isStartOfWeek(startingDay):
1824 """ 1825 Indicates whether "today" is the backup starting day per configuration. 1826 1827 If the current day's English name matches the indicated starting day, then 1828 today is a starting day. 1829 1830 @param startingDay: Configured starting day. 1831 @type startingDay: string, i.e. C{"monday"}, C{"tuesday"}, etc. 1832 1833 @return: Boolean indicating whether today is the starting day. 1834 """ 1835 value = time.localtime().tm_wday == deriveDayOfWeek(startingDay) 1836 if value: 1837 logger.debug("Today is the start of the week.") 1838 else: 1839 logger.debug("Today is NOT the start of the week.") 1840 return value
1841
1842 1843 ################################# 1844 # buildNormalizedPath() function 1845 ################################# 1846 1847 -def buildNormalizedPath(path):
1848 """ 1849 Returns a "normalized" path based on a path name. 1850 1851 A normalized path is a representation of a path that is also a valid file 1852 name. To make a valid file name out of a complete path, we have to convert 1853 or remove some characters that are significant to the filesystem -- in 1854 particular, the path separator and any leading C{'.'} character (which would 1855 cause the file to be hidden in a file listing). 1856 1857 Note that this is a one-way transformation -- you can't safely derive the 1858 original path from the normalized path. 1859 1860 To normalize a path, we begin by looking at the first character. If the 1861 first character is C{'/'} or C{'\\'}, it gets removed. If the first 1862 character is C{'.'}, it gets converted to C{'_'}. Then, we look through the 1863 rest of the path and convert all remaining C{'/'} or C{'\\'} characters 1864 C{'-'}, and all remaining whitespace characters to C{'_'}. 1865 1866 As a special case, a path consisting only of a single C{'/'} or C{'\\'} 1867 character will be converted to C{'-'}. 1868 1869 @param path: Path to normalize 1870 1871 @return: Normalized path as described above. 1872 1873 @raise ValueError: If the path is None 1874 """ 1875 if path is None: 1876 raise ValueError("Cannot normalize path None.") 1877 elif len(path) == 0: 1878 return path 1879 elif path == "/" or path == "\\": 1880 return "-" 1881 else: 1882 normalized = path 1883 normalized = re.sub(r"^\/", "", normalized) # remove leading '/' 1884 normalized = re.sub(r"^\\", "", normalized) # remove leading '\' 1885 normalized = re.sub(r"^\.", "_", normalized) # convert leading '.' to '_' so file won't be hidden 1886 normalized = re.sub(r"\/", "-", normalized) # convert all '/' characters to '-' 1887 normalized = re.sub(r"\\", "-", normalized) # convert all '\' characters to '-' 1888 normalized = re.sub(r"\s", "_", normalized) # convert all whitespace to '_' 1889 return normalized
1890
1891 1892 ################################# 1893 # sanitizeEnvironment() function 1894 ################################# 1895 1896 -def sanitizeEnvironment():
1897 """ 1898 Sanitizes the operating system environment. 1899 1900 The operating system environment is contained in C{os.environ}. This method 1901 sanitizes the contents of that dictionary. 1902 1903 Currently, all it does is reset the locale (removing C{$LC_*}) and set the 1904 default language (C{$LANG}) to L{DEFAULT_LANGUAGE}. This way, we can count 1905 on consistent localization regardless of what the end-user has configured. 1906 This is important for code that needs to parse program output. 1907 1908 The C{os.environ} dictionary is modifed in-place. If C{$LANG} is already 1909 set to the proper value, it is not re-set, so we can avoid the memory leaks 1910 that are documented to occur on BSD-based systems. 1911 1912 @return: Copy of the sanitized environment. 1913 """ 1914 for var in LOCALE_VARS: 1915 if os.environ.has_key(var): 1916 del os.environ[var] 1917 if os.environ.has_key(LANG_VAR): 1918 if os.environ[LANG_VAR] != DEFAULT_LANGUAGE: # no need to reset if it exists (avoid leaks on BSD systems) 1919 os.environ[LANG_VAR] = DEFAULT_LANGUAGE 1920 return os.environ.copy()
1921 1940
1941 1942 ######################### 1943 # checkUnique() function 1944 ######################### 1945 1946 -def checkUnique(prefix, values):
1947 """ 1948 Checks that all values are unique. 1949 1950 The values list is checked for duplicate values. If there are 1951 duplicates, an exception is thrown. All duplicate values are listed in 1952 the exception. 1953 1954 @param prefix: Prefix to use in the thrown exception 1955 @param values: List of values to check 1956 1957 @raise ValueError: If there are duplicates in the list 1958 """ 1959 values.sort() 1960 duplicates = [] 1961 for i in range(1, len(values)): 1962 if values[i-1] == values[i]: 1963 duplicates.append(values[i]) 1964 if duplicates: 1965 raise ValueError("%s %s" % (prefix, duplicates))
1966
1967 1968 ####################################### 1969 # parseCommaSeparatedString() function 1970 ####################################### 1971 1972 -def parseCommaSeparatedString(commaString):
1973 """ 1974 Parses a list of values out of a comma-separated string. 1975 1976 The items in the list are split by comma, and then have whitespace 1977 stripped. As a special case, if C{commaString} is C{None}, then C{None} 1978 will be returned. 1979 1980 @param commaString: List of values in comma-separated string format. 1981 @return: Values from commaString split into a list, or C{None}. 1982 """ 1983 if commaString is None: 1984 return None 1985 else: 1986 pass1 = commaString.split(",") 1987 pass2 = [] 1988 for item in pass1: 1989 item = item.strip() 1990 if len(item) > 0: 1991 pass2.append(item) 1992 return pass2
1993