Package CedarBackup2 :: Package tools :: Module amazons3
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.tools.amazons3

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2014 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python 2 (>= 2.7) 
  29  # Project  : Cedar Backup, release 2 
  30  # Purpose  : Cedar Backup tool to synchronize an Amazon S3 bucket. 
  31  # 
  32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  33   
  34  ######################################################################## 
  35  # Notes 
  36  ######################################################################## 
  37   
  38  """ 
  39  Synchonizes a local directory with an Amazon S3 bucket. 
  40   
  41  No configuration is required; all necessary information is taken from the 
  42  command-line.  The only thing configuration would help with is the path 
  43  resolver interface, and it doesn't seem worth it to require configuration just 
  44  to get that. 
  45   
  46  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  47  """ 
  48   
  49  ######################################################################## 
  50  # Imported modules and constants 
  51  ######################################################################## 
  52   
  53  # System modules 
  54  import sys 
  55  import os 
  56  import logging 
  57  import getopt 
  58  import json 
  59  import warnings 
  60  import chardet 
  61   
  62  # Cedar Backup modules 
  63  from CedarBackup2.release import AUTHOR, EMAIL, VERSION, DATE, COPYRIGHT 
  64  from CedarBackup2.filesystem import FilesystemList 
  65  from CedarBackup2.cli import setupLogging, DEFAULT_LOGFILE, DEFAULT_OWNERSHIP, DEFAULT_MODE 
  66  from CedarBackup2.util import Diagnostics, splitCommandLine, encodePath 
  67  from CedarBackup2.util import executeCommand 
  68   
  69   
  70  ######################################################################## 
  71  # Module-wide constants and variables 
  72  ######################################################################## 
  73   
  74  logger = logging.getLogger("CedarBackup2.log.tools.amazons3") 
  75   
  76  AWS_COMMAND   = [ "aws" ] 
  77   
  78  SHORT_SWITCHES     = "hVbql:o:m:OdsDvw" 
  79  LONG_SWITCHES      = [ 'help', 'version', 'verbose', 'quiet', 
  80                         'logfile=', 'owner=', 'mode=', 
  81                         'output', 'debug', 'stack', 'diagnostics', 
  82                         'verifyOnly', 'ignoreWarnings', ] 
  83   
  84   
  85  ####################################################################### 
  86  # Options class 
  87  ####################################################################### 
  88   
89 -class Options(object):
90 91 ###################### 92 # Class documentation 93 ###################### 94 95 """ 96 Class representing command-line options for the cback-amazons3-sync script. 97 98 The C{Options} class is a Python object representation of the command-line 99 options of the cback script. 100 101 The object representation is two-way: a command line string or a list of 102 command line arguments can be used to create an C{Options} object, and then 103 changes to the object can be propogated back to a list of command-line 104 arguments or to a command-line string. An C{Options} object can even be 105 created from scratch programmatically (if you have a need for that). 106 107 There are two main levels of validation in the C{Options} class. The first 108 is field-level validation. Field-level validation comes into play when a 109 given field in an object is assigned to or updated. We use Python's 110 C{property} functionality to enforce specific validations on field values, 111 and in some places we even use customized list classes to enforce 112 validations on list members. You should expect to catch a C{ValueError} 113 exception when making assignments to fields if you are programmatically 114 filling an object. 115 116 The second level of validation is post-completion validation. Certain 117 validations don't make sense until an object representation of options is 118 fully "complete". We don't want these validations to apply all of the time, 119 because it would make building up a valid object from scratch a real pain. 120 For instance, we might have to do things in the right order to keep from 121 throwing exceptions, etc. 122 123 All of these post-completion validations are encapsulated in the 124 L{Options.validate} method. This method can be called at any time by a 125 client, and will always be called immediately after creating a C{Options} 126 object from a command line and before exporting a C{Options} object back to 127 a command line. This way, we get acceptable ease-of-use but we also don't 128 accept or emit invalid command lines. 129 130 @note: Lists within this class are "unordered" for equality comparisons. 131 132 @sort: __init__, __repr__, __str__, __cmp__ 133 """ 134 135 ############## 136 # Constructor 137 ############## 138
139 - def __init__(self, argumentList=None, argumentString=None, validate=True):
140 """ 141 Initializes an options object. 142 143 If you initialize the object without passing either C{argumentList} or 144 C{argumentString}, the object will be empty and will be invalid until it 145 is filled in properly. 146 147 No reference to the original arguments is saved off by this class. Once 148 the data has been parsed (successfully or not) this original information 149 is discarded. 150 151 The argument list is assumed to be a list of arguments, not including the 152 name of the command, something like C{sys.argv[1:]}. If you pass 153 C{sys.argv} instead, things are not going to work. 154 155 The argument string will be parsed into an argument list by the 156 L{util.splitCommandLine} function (see the documentation for that 157 function for some important notes about its limitations). There is an 158 assumption that the resulting list will be equivalent to C{sys.argv[1:]}, 159 just like C{argumentList}. 160 161 Unless the C{validate} argument is C{False}, the L{Options.validate} 162 method will be called (with its default arguments) after successfully 163 parsing any passed-in command line. This validation ensures that 164 appropriate actions, etc. have been specified. Keep in mind that even if 165 C{validate} is C{False}, it might not be possible to parse the passed-in 166 command line, so an exception might still be raised. 167 168 @note: The command line format is specified by the L{_usage} function. 169 Call L{_usage} to see a usage statement for the cback script. 170 171 @note: It is strongly suggested that the C{validate} option always be set 172 to C{True} (the default) unless there is a specific need to read in 173 invalid command line arguments. 174 175 @param argumentList: Command line for a program. 176 @type argumentList: List of arguments, i.e. C{sys.argv} 177 178 @param argumentString: Command line for a program. 179 @type argumentString: String, i.e. "cback --verbose stage store" 180 181 @param validate: Validate the command line after parsing it. 182 @type validate: Boolean true/false. 183 184 @raise getopt.GetoptError: If the command-line arguments could not be parsed. 185 @raise ValueError: If the command-line arguments are invalid. 186 """ 187 self._help = False 188 self._version = False 189 self._verbose = False 190 self._quiet = False 191 self._logfile = None 192 self._owner = None 193 self._mode = None 194 self._output = False 195 self._debug = False 196 self._stacktrace = False 197 self._diagnostics = False 198 self._verifyOnly = False 199 self._ignoreWarnings = False 200 self._sourceDir = None 201 self._s3BucketUrl = None 202 if argumentList is not None and argumentString is not None: 203 raise ValueError("Use either argumentList or argumentString, but not both.") 204 if argumentString is not None: 205 argumentList = splitCommandLine(argumentString) 206 if argumentList is not None: 207 self._parseArgumentList(argumentList) 208 if validate: 209 self.validate()
210 211 212 ######################### 213 # String representations 214 ######################### 215
216 - def __repr__(self):
217 """ 218 Official string representation for class instance. 219 """ 220 return self.buildArgumentString(validate=False)
221
222 - def __str__(self):
223 """ 224 Informal string representation for class instance. 225 """ 226 return self.__repr__()
227 228 229 ############################# 230 # Standard comparison method 231 ############################# 232
233 - def __cmp__(self, other):
234 """ 235 Definition of equals operator for this class. 236 Lists within this class are "unordered" for equality comparisons. 237 @param other: Other object to compare to. 238 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 239 """ 240 if other is None: 241 return 1 242 if self.help != other.help: 243 if self.help < other.help: 244 return -1 245 else: 246 return 1 247 if self.version != other.version: 248 if self.version < other.version: 249 return -1 250 else: 251 return 1 252 if self.verbose != other.verbose: 253 if self.verbose < other.verbose: 254 return -1 255 else: 256 return 1 257 if self.quiet != other.quiet: 258 if self.quiet < other.quiet: 259 return -1 260 else: 261 return 1 262 if self.logfile != other.logfile: 263 if self.logfile < other.logfile: 264 return -1 265 else: 266 return 1 267 if self.owner != other.owner: 268 if self.owner < other.owner: 269 return -1 270 else: 271 return 1 272 if self.mode != other.mode: 273 if self.mode < other.mode: 274 return -1 275 else: 276 return 1 277 if self.output != other.output: 278 if self.output < other.output: 279 return -1 280 else: 281 return 1 282 if self.debug != other.debug: 283 if self.debug < other.debug: 284 return -1 285 else: 286 return 1 287 if self.stacktrace != other.stacktrace: 288 if self.stacktrace < other.stacktrace: 289 return -1 290 else: 291 return 1 292 if self.diagnostics != other.diagnostics: 293 if self.diagnostics < other.diagnostics: 294 return -1 295 else: 296 return 1 297 if self.verifyOnly != other.verifyOnly: 298 if self.verifyOnly < other.verifyOnly: 299 return -1 300 else: 301 return 1 302 if self.ignoreWarnings != other.ignoreWarnings: 303 if self.ignoreWarnings < other.ignoreWarnings: 304 return -1 305 else: 306 return 1 307 if self.sourceDir != other.sourceDir: 308 if self.sourceDir < other.sourceDir: 309 return -1 310 else: 311 return 1 312 if self.s3BucketUrl != other.s3BucketUrl: 313 if self.s3BucketUrl < other.s3BucketUrl: 314 return -1 315 else: 316 return 1 317 return 0
318 319 320 ############# 321 # Properties 322 ############# 323
324 - def _setHelp(self, value):
325 """ 326 Property target used to set the help flag. 327 No validations, but we normalize the value to C{True} or C{False}. 328 """ 329 if value: 330 self._help = True 331 else: 332 self._help = False
333
334 - def _getHelp(self):
335 """ 336 Property target used to get the help flag. 337 """ 338 return self._help
339
340 - def _setVersion(self, value):
341 """ 342 Property target used to set the version flag. 343 No validations, but we normalize the value to C{True} or C{False}. 344 """ 345 if value: 346 self._version = True 347 else: 348 self._version = False
349
350 - def _getVersion(self):
351 """ 352 Property target used to get the version flag. 353 """ 354 return self._version
355
356 - def _setVerbose(self, value):
357 """ 358 Property target used to set the verbose flag. 359 No validations, but we normalize the value to C{True} or C{False}. 360 """ 361 if value: 362 self._verbose = True 363 else: 364 self._verbose = False
365
366 - def _getVerbose(self):
367 """ 368 Property target used to get the verbose flag. 369 """ 370 return self._verbose
371
372 - def _setQuiet(self, value):
373 """ 374 Property target used to set the quiet flag. 375 No validations, but we normalize the value to C{True} or C{False}. 376 """ 377 if value: 378 self._quiet = True 379 else: 380 self._quiet = False
381
382 - def _getQuiet(self):
383 """ 384 Property target used to get the quiet flag. 385 """ 386 return self._quiet
387
388 - def _setLogfile(self, value):
389 """ 390 Property target used to set the logfile parameter. 391 @raise ValueError: If the value cannot be encoded properly. 392 """ 393 if value is not None: 394 if len(value) < 1: 395 raise ValueError("The logfile parameter must be a non-empty string.") 396 self._logfile = encodePath(value)
397
398 - def _getLogfile(self):
399 """ 400 Property target used to get the logfile parameter. 401 """ 402 return self._logfile
403
404 - def _setOwner(self, value):
405 """ 406 Property target used to set the owner parameter. 407 If not C{None}, the owner must be a C{(user,group)} tuple or list. 408 Strings (and inherited children of strings) are explicitly disallowed. 409 The value will be normalized to a tuple. 410 @raise ValueError: If the value is not valid. 411 """ 412 if value is None: 413 self._owner = None 414 else: 415 if isinstance(value, str): 416 raise ValueError("Must specify user and group tuple for owner parameter.") 417 if len(value) != 2: 418 raise ValueError("Must specify user and group tuple for owner parameter.") 419 if len(value[0]) < 1 or len(value[1]) < 1: 420 raise ValueError("User and group tuple values must be non-empty strings.") 421 self._owner = (value[0], value[1])
422
423 - def _getOwner(self):
424 """ 425 Property target used to get the owner parameter. 426 The parameter is a tuple of C{(user, group)}. 427 """ 428 return self._owner
429
430 - def _setMode(self, value):
431 """ 432 Property target used to set the mode parameter. 433 """ 434 if value is None: 435 self._mode = None 436 else: 437 try: 438 if isinstance(value, str): 439 value = int(value, 8) 440 else: 441 value = int(value) 442 except TypeError: 443 raise ValueError("Mode must be an octal integer >= 0, i.e. 644.") 444 if value < 0: 445 raise ValueError("Mode must be an octal integer >= 0. i.e. 644.") 446 self._mode = value
447
448 - def _getMode(self):
449 """ 450 Property target used to get the mode parameter. 451 """ 452 return self._mode
453
454 - def _setOutput(self, value):
455 """ 456 Property target used to set the output flag. 457 No validations, but we normalize the value to C{True} or C{False}. 458 """ 459 if value: 460 self._output = True 461 else: 462 self._output = False
463
464 - def _getOutput(self):
465 """ 466 Property target used to get the output flag. 467 """ 468 return self._output
469
470 - def _setDebug(self, value):
471 """ 472 Property target used to set the debug flag. 473 No validations, but we normalize the value to C{True} or C{False}. 474 """ 475 if value: 476 self._debug = True 477 else: 478 self._debug = False
479
480 - def _getDebug(self):
481 """ 482 Property target used to get the debug flag. 483 """ 484 return self._debug
485
486 - def _setStacktrace(self, value):
487 """ 488 Property target used to set the stacktrace flag. 489 No validations, but we normalize the value to C{True} or C{False}. 490 """ 491 if value: 492 self._stacktrace = True 493 else: 494 self._stacktrace = False
495
496 - def _getStacktrace(self):
497 """ 498 Property target used to get the stacktrace flag. 499 """ 500 return self._stacktrace
501
502 - def _setDiagnostics(self, value):
503 """ 504 Property target used to set the diagnostics flag. 505 No validations, but we normalize the value to C{True} or C{False}. 506 """ 507 if value: 508 self._diagnostics = True 509 else: 510 self._diagnostics = False
511
512 - def _getDiagnostics(self):
513 """ 514 Property target used to get the diagnostics flag. 515 """ 516 return self._diagnostics
517
518 - def _setVerifyOnly(self, value):
519 """ 520 Property target used to set the verifyOnly flag. 521 No validations, but we normalize the value to C{True} or C{False}. 522 """ 523 if value: 524 self._verifyOnly = True 525 else: 526 self._verifyOnly = False
527
528 - def _getVerifyOnly(self):
529 """ 530 Property target used to get the verifyOnly flag. 531 """ 532 return self._verifyOnly
533
534 - def _setIgnoreWarnings(self, value):
535 """ 536 Property target used to set the ignoreWarnings flag. 537 No validations, but we normalize the value to C{True} or C{False}. 538 """ 539 if value: 540 self._ignoreWarnings = True 541 else: 542 self._ignoreWarnings = False
543
544 - def _getIgnoreWarnings(self):
545 """ 546 Property target used to get the ignoreWarnings flag. 547 """ 548 return self._ignoreWarnings
549
550 - def _setSourceDir(self, value):
551 """ 552 Property target used to set the sourceDir parameter. 553 """ 554 if value is not None: 555 if len(value) < 1: 556 raise ValueError("The sourceDir parameter must be a non-empty string.") 557 self._sourceDir = value
558
559 - def _getSourceDir(self):
560 """ 561 Property target used to get the sourceDir parameter. 562 """ 563 return self._sourceDir
564
565 - def _setS3BucketUrl(self, value):
566 """ 567 Property target used to set the s3BucketUrl parameter. 568 """ 569 if value is not None: 570 if len(value) < 1: 571 raise ValueError("The s3BucketUrl parameter must be a non-empty string.") 572 self._s3BucketUrl = value
573
574 - def _getS3BucketUrl(self):
575 """ 576 Property target used to get the s3BucketUrl parameter. 577 """ 578 return self._s3BucketUrl
579 580 help = property(_getHelp, _setHelp, None, "Command-line help (C{-h,--help}) flag.") 581 version = property(_getVersion, _setVersion, None, "Command-line version (C{-V,--version}) flag.") 582 verbose = property(_getVerbose, _setVerbose, None, "Command-line verbose (C{-b,--verbose}) flag.") 583 quiet = property(_getQuiet, _setQuiet, None, "Command-line quiet (C{-q,--quiet}) flag.") 584 logfile = property(_getLogfile, _setLogfile, None, "Command-line logfile (C{-l,--logfile}) parameter.") 585 owner = property(_getOwner, _setOwner, None, "Command-line owner (C{-o,--owner}) parameter, as tuple C{(user,group)}.") 586 mode = property(_getMode, _setMode, None, "Command-line mode (C{-m,--mode}) parameter.") 587 output = property(_getOutput, _setOutput, None, "Command-line output (C{-O,--output}) flag.") 588 debug = property(_getDebug, _setDebug, None, "Command-line debug (C{-d,--debug}) flag.") 589 stacktrace = property(_getStacktrace, _setStacktrace, None, "Command-line stacktrace (C{-s,--stack}) flag.") 590 diagnostics = property(_getDiagnostics, _setDiagnostics, None, "Command-line diagnostics (C{-D,--diagnostics}) flag.") 591 verifyOnly = property(_getVerifyOnly, _setVerifyOnly, None, "Command-line verifyOnly (C{-v,--verifyOnly}) flag.") 592 ignoreWarnings = property(_getIgnoreWarnings, _setIgnoreWarnings, None, "Command-line ignoreWarnings (C{-w,--ignoreWarnings}) flag.") 593 sourceDir = property(_getSourceDir, _setSourceDir, None, "Command-line sourceDir, source of sync.") 594 s3BucketUrl = property(_getS3BucketUrl, _setS3BucketUrl, None, "Command-line s3BucketUrl, target of sync.") 595 596 597 ################## 598 # Utility methods 599 ################## 600
601 - def validate(self):
602 """ 603 Validates command-line options represented by the object. 604 605 Unless C{--help} or C{--version} are supplied, at least one action must 606 be specified. Other validations (as for allowed values for particular 607 options) will be taken care of at assignment time by the properties 608 functionality. 609 610 @note: The command line format is specified by the L{_usage} function. 611 Call L{_usage} to see a usage statement for the cback script. 612 613 @raise ValueError: If one of the validations fails. 614 """ 615 if not self.help and not self.version and not self.diagnostics: 616 if self.sourceDir is None or self.s3BucketUrl is None: 617 raise ValueError("Source directory and S3 bucket URL are both required.")
618
619 - def buildArgumentList(self, validate=True):
620 """ 621 Extracts options into a list of command line arguments. 622 623 The original order of the various arguments (if, indeed, the object was 624 initialized with a command-line) is not preserved in this generated 625 argument list. Besides that, the argument list is normalized to use the 626 long option names (i.e. --version rather than -V). The resulting list 627 will be suitable for passing back to the constructor in the 628 C{argumentList} parameter. Unlike L{buildArgumentString}, string 629 arguments are not quoted here, because there is no need for it. 630 631 Unless the C{validate} parameter is C{False}, the L{Options.validate} 632 method will be called (with its default arguments) against the 633 options before extracting the command line. If the options are not valid, 634 then an argument list will not be extracted. 635 636 @note: It is strongly suggested that the C{validate} option always be set 637 to C{True} (the default) unless there is a specific need to extract an 638 invalid command line. 639 640 @param validate: Validate the options before extracting the command line. 641 @type validate: Boolean true/false. 642 643 @return: List representation of command-line arguments. 644 @raise ValueError: If options within the object are invalid. 645 """ 646 if validate: 647 self.validate() 648 argumentList = [] 649 if self._help: 650 argumentList.append("--help") 651 if self.version: 652 argumentList.append("--version") 653 if self.verbose: 654 argumentList.append("--verbose") 655 if self.quiet: 656 argumentList.append("--quiet") 657 if self.logfile is not None: 658 argumentList.append("--logfile") 659 argumentList.append(self.logfile) 660 if self.owner is not None: 661 argumentList.append("--owner") 662 argumentList.append("%s:%s" % (self.owner[0], self.owner[1])) 663 if self.mode is not None: 664 argumentList.append("--mode") 665 argumentList.append("%o" % self.mode) 666 if self.output: 667 argumentList.append("--output") 668 if self.debug: 669 argumentList.append("--debug") 670 if self.stacktrace: 671 argumentList.append("--stack") 672 if self.diagnostics: 673 argumentList.append("--diagnostics") 674 if self.verifyOnly: 675 argumentList.append("--verifyOnly") 676 if self.ignoreWarnings: 677 argumentList.append("--ignoreWarnings") 678 if self.sourceDir is not None: 679 argumentList.append(self.sourceDir) 680 if self.s3BucketUrl is not None: 681 argumentList.append(self.s3BucketUrl) 682 return argumentList
683
684 - def buildArgumentString(self, validate=True):
685 """ 686 Extracts options into a string of command-line arguments. 687 688 The original order of the various arguments (if, indeed, the object was 689 initialized with a command-line) is not preserved in this generated 690 argument string. Besides that, the argument string is normalized to use 691 the long option names (i.e. --version rather than -V) and to quote all 692 string arguments with double quotes (C{"}). The resulting string will be 693 suitable for passing back to the constructor in the C{argumentString} 694 parameter. 695 696 Unless the C{validate} parameter is C{False}, the L{Options.validate} 697 method will be called (with its default arguments) against the options 698 before extracting the command line. If the options are not valid, then 699 an argument string will not be extracted. 700 701 @note: It is strongly suggested that the C{validate} option always be set 702 to C{True} (the default) unless there is a specific need to extract an 703 invalid command line. 704 705 @param validate: Validate the options before extracting the command line. 706 @type validate: Boolean true/false. 707 708 @return: String representation of command-line arguments. 709 @raise ValueError: If options within the object are invalid. 710 """ 711 if validate: 712 self.validate() 713 argumentString = "" 714 if self._help: 715 argumentString += "--help " 716 if self.version: 717 argumentString += "--version " 718 if self.verbose: 719 argumentString += "--verbose " 720 if self.quiet: 721 argumentString += "--quiet " 722 if self.logfile is not None: 723 argumentString += "--logfile \"%s\" " % self.logfile 724 if self.owner is not None: 725 argumentString += "--owner \"%s:%s\" " % (self.owner[0], self.owner[1]) 726 if self.mode is not None: 727 argumentString += "--mode %o " % self.mode 728 if self.output: 729 argumentString += "--output " 730 if self.debug: 731 argumentString += "--debug " 732 if self.stacktrace: 733 argumentString += "--stack " 734 if self.diagnostics: 735 argumentString += "--diagnostics " 736 if self.verifyOnly: 737 argumentString += "--verifyOnly " 738 if self.ignoreWarnings: 739 argumentString += "--ignoreWarnings " 740 if self.sourceDir is not None: 741 argumentString += "\"%s\" " % self.sourceDir 742 if self.s3BucketUrl is not None: 743 argumentString += "\"%s\" " % self.s3BucketUrl 744 return argumentString
745
746 - def _parseArgumentList(self, argumentList):
747 """ 748 Internal method to parse a list of command-line arguments. 749 750 Most of the validation we do here has to do with whether the arguments 751 can be parsed and whether any values which exist are valid. We don't do 752 any validation as to whether required elements exist or whether elements 753 exist in the proper combination (instead, that's the job of the 754 L{validate} method). 755 756 For any of the options which supply parameters, if the option is 757 duplicated with long and short switches (i.e. C{-l} and a C{--logfile}) 758 then the long switch is used. If the same option is duplicated with the 759 same switch (long or short), then the last entry on the command line is 760 used. 761 762 @param argumentList: List of arguments to a command. 763 @type argumentList: List of arguments to a command, i.e. C{sys.argv[1:]} 764 765 @raise ValueError: If the argument list cannot be successfully parsed. 766 """ 767 switches = { } 768 opts, remaining = getopt.getopt(argumentList, SHORT_SWITCHES, LONG_SWITCHES) 769 for o, a in opts: # push the switches into a hash 770 switches[o] = a 771 if switches.has_key("-h") or switches.has_key("--help"): 772 self.help = True 773 if switches.has_key("-V") or switches.has_key("--version"): 774 self.version = True 775 if switches.has_key("-b") or switches.has_key("--verbose"): 776 self.verbose = True 777 if switches.has_key("-q") or switches.has_key("--quiet"): 778 self.quiet = True 779 if switches.has_key("-l"): 780 self.logfile = switches["-l"] 781 if switches.has_key("--logfile"): 782 self.logfile = switches["--logfile"] 783 if switches.has_key("-o"): 784 self.owner = switches["-o"].split(":", 1) 785 if switches.has_key("--owner"): 786 self.owner = switches["--owner"].split(":", 1) 787 if switches.has_key("-m"): 788 self.mode = switches["-m"] 789 if switches.has_key("--mode"): 790 self.mode = switches["--mode"] 791 if switches.has_key("-O") or switches.has_key("--output"): 792 self.output = True 793 if switches.has_key("-d") or switches.has_key("--debug"): 794 self.debug = True 795 if switches.has_key("-s") or switches.has_key("--stack"): 796 self.stacktrace = True 797 if switches.has_key("-D") or switches.has_key("--diagnostics"): 798 self.diagnostics = True 799 if switches.has_key("-v") or switches.has_key("--verifyOnly"): 800 self.verifyOnly = True 801 if switches.has_key("-w") or switches.has_key("--ignoreWarnings"): 802 self.ignoreWarnings = True 803 try: 804 (self.sourceDir, self.s3BucketUrl) = remaining 805 except ValueError: 806 pass
807 808 809 ####################################################################### 810 # Public functions 811 ####################################################################### 812 813 ################# 814 # cli() function 815 ################# 816
817 -def cli():
818 """ 819 Implements the command-line interface for the C{cback-amazons3-sync} script. 820 821 Essentially, this is the "main routine" for the cback-amazons3-sync script. It does 822 all of the argument processing for the script, and then also implements the 823 tool functionality. 824 825 This function looks pretty similiar to C{CedarBackup2.cli.cli()}. It's not 826 easy to refactor this code to make it reusable and also readable, so I've 827 decided to just live with the duplication. 828 829 A different error code is returned for each type of failure: 830 831 - C{1}: The Python interpreter version is < 2.7 832 - C{2}: Error processing command-line arguments 833 - C{3}: Error configuring logging 834 - C{5}: Backup was interrupted with a CTRL-C or similar 835 - C{6}: Error executing other parts of the script 836 837 @note: This script uses print rather than logging to the INFO level, because 838 it is interactive. Underlying Cedar Backup functionality uses the logging 839 mechanism exclusively. 840 841 @return: Error code as described above. 842 """ 843 try: 844 if map(int, [sys.version_info[0], sys.version_info[1]]) < [2, 7]: 845 sys.stderr.write("Python 2 version 2.7 or greater required.\n") 846 return 1 847 except: 848 # sys.version_info isn't available before 2.0 849 sys.stderr.write("Python 2 version 2.7 or greater required.\n") 850 return 1 851 852 try: 853 options = Options(argumentList=sys.argv[1:]) 854 except Exception, e: 855 _usage() 856 sys.stderr.write(" *** Error: %s\n" % e) 857 return 2 858 859 if options.help: 860 _usage() 861 return 0 862 if options.version: 863 _version() 864 return 0 865 if options.diagnostics: 866 _diagnostics() 867 return 0 868 869 if options.stacktrace: 870 logfile = setupLogging(options) 871 else: 872 try: 873 logfile = setupLogging(options) 874 except Exception as e: 875 sys.stderr.write("Error setting up logging: %s\n" % e) 876 return 3 877 878 logger.info("Cedar Backup Amazon S3 sync run started.") 879 logger.info("Options were [%s]", options) 880 logger.info("Logfile is [%s]", logfile) 881 Diagnostics().logDiagnostics(method=logger.info) 882 883 if options.stacktrace: 884 _executeAction(options) 885 else: 886 try: 887 _executeAction(options) 888 except KeyboardInterrupt: 889 logger.error("Backup interrupted.") 890 logger.info("Cedar Backup Amazon S3 sync run completed with status 5.") 891 return 5 892 except Exception, e: 893 logger.error("Error executing backup: %s", e) 894 logger.info("Cedar Backup Amazon S3 sync run completed with status 6.") 895 return 6 896 897 logger.info("Cedar Backup Amazon S3 sync run completed with status 0.") 898 return 0
899 900 901 ####################################################################### 902 # Utility functions 903 ####################################################################### 904 905 #################### 906 # _usage() function 907 #################### 908
909 -def _usage(fd=sys.stderr):
910 """ 911 Prints usage information for the cback-amazons3-sync script. 912 @param fd: File descriptor used to print information. 913 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 914 """ 915 fd.write("\n") 916 fd.write(" Usage: cback-amazons3-sync [switches] sourceDir s3bucketUrl\n") 917 fd.write("\n") 918 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 919 fd.write("\n") 920 fd.write(" This Cedar Backup utility synchronizes a local directory to an Amazon S3\n") 921 fd.write(" bucket. After the sync is complete, a validation step is taken. An\n") 922 fd.write(" error is reported if the contents of the bucket do not match the\n") 923 fd.write(" source directory, or if the indicated size for any file differs.\n") 924 fd.write(" This tool is a wrapper over the AWS CLI command-line tool.\n") 925 fd.write("\n") 926 fd.write(" The following arguments are required:\n") 927 fd.write("\n") 928 fd.write(" sourceDir The local source directory on disk (must exist)\n") 929 fd.write(" s3BucketUrl The URL to the target Amazon S3 bucket\n") 930 fd.write("\n") 931 fd.write(" The following switches are accepted:\n") 932 fd.write("\n") 933 fd.write(" -h, --help Display this usage/help listing\n") 934 fd.write(" -V, --version Display version information\n") 935 fd.write(" -b, --verbose Print verbose output as well as logging to disk\n") 936 fd.write(" -q, --quiet Run quietly (display no output to the screen)\n") 937 fd.write(" -l, --logfile Path to logfile (default: %s)\n" % DEFAULT_LOGFILE) 938 fd.write(" -o, --owner Logfile ownership, user:group (default: %s:%s)\n" % (DEFAULT_OWNERSHIP[0], DEFAULT_OWNERSHIP[1])) 939 fd.write(" -m, --mode Octal logfile permissions mode (default: %o)\n" % DEFAULT_MODE) 940 fd.write(" -O, --output Record some sub-command (i.e. aws) output to the log\n") 941 fd.write(" -d, --debug Write debugging information to the log (implies --output)\n") 942 fd.write(" -s, --stack Dump Python stack trace instead of swallowing exceptions\n") # exactly 80 characters in width! 943 fd.write(" -D, --diagnostics Print runtime diagnostics to the screen and exit\n") 944 fd.write(" -v, --verifyOnly Only verify the S3 bucket contents, do not make changes\n") 945 fd.write(" -w, --ignoreWarnings Ignore warnings about problematic filename encodings\n") 946 fd.write("\n") 947 fd.write(" Typical usage would be something like:\n") 948 fd.write("\n") 949 fd.write(" cback-amazons3-sync /home/myuser s3://example.com-backup/myuser\n") 950 fd.write("\n") 951 fd.write(" This will sync the contents of /home/myuser into the indicated bucket.\n") 952 fd.write("\n")
953 954 955 ###################### 956 # _version() function 957 ###################### 958
959 -def _version(fd=sys.stdout):
960 """ 961 Prints version information for the cback script. 962 @param fd: File descriptor used to print information. 963 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 964 """ 965 fd.write("\n") 966 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 967 fd.write(" Included with Cedar Backup version %s, released %s.\n" % (VERSION, DATE)) 968 fd.write("\n") 969 fd.write(" Copyright (c) %s %s <%s>.\n" % (COPYRIGHT, AUTHOR, EMAIL)) 970 fd.write(" See CREDITS for a list of included code and other contributors.\n") 971 fd.write(" This is free software; there is NO warranty. See the\n") 972 fd.write(" GNU General Public License version 2 for copying conditions.\n") 973 fd.write("\n") 974 fd.write(" Use the --help option for usage information.\n") 975 fd.write("\n")
976 977 978 ########################## 979 # _diagnostics() function 980 ########################## 981
982 -def _diagnostics(fd=sys.stdout):
983 """ 984 Prints runtime diagnostics information. 985 @param fd: File descriptor used to print information. 986 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 987 """ 988 fd.write("\n") 989 fd.write("Diagnostics:\n") 990 fd.write("\n") 991 Diagnostics().printDiagnostics(fd=fd, prefix=" ") 992 fd.write("\n")
993 994 995 ############################ 996 # _executeAction() function 997 ############################ 998
999 -def _executeAction(options):
1000 """ 1001 Implements the guts of the cback-amazons3-sync tool. 1002 1003 @param options: Program command-line options. 1004 @type options: Options object. 1005 1006 @raise Exception: Under many generic error conditions 1007 """ 1008 sourceFiles = _buildSourceFiles(options.sourceDir) 1009 if not options.ignoreWarnings: 1010 _checkSourceFiles(options.sourceDir, sourceFiles) 1011 if not options.verifyOnly: 1012 _synchronizeBucket(options.sourceDir, options.s3BucketUrl) 1013 _verifyBucketContents(options.sourceDir, sourceFiles, options.s3BucketUrl)
1014 1015 1016 ################################ 1017 # _buildSourceFiles() function 1018 ################################ 1019
1020 -def _buildSourceFiles(sourceDir):
1021 """ 1022 Build a list of files in a source directory 1023 @param sourceDir: Local source directory 1024 @return: FilesystemList with contents of source directory 1025 """ 1026 if not os.path.isdir(sourceDir): 1027 raise ValueError("Source directory does not exist on disk.") 1028 sourceFiles = FilesystemList() 1029 sourceFiles.addDirContents(sourceDir) 1030 return sourceFiles
1031 1032 1033 ############################### 1034 # _checkSourceFiles() function 1035 ############################### 1036
1037 -def _checkSourceFiles(sourceDir, sourceFiles):
1038 """ 1039 Check source files, trying to guess which ones will have encoding problems. 1040 @param sourceDir: Local source directory 1041 @param sourceDir: Local source directory 1042 @raises ValueError: If a problem file is found 1043 @see U{http://opensourcehacker.com/2011/09/16/fix-linux-filename-encodings-with-python/} 1044 @see U{http://serverfault.com/questions/82821/how-to-tell-the-language-encoding-of-a-filename-on-linux} 1045 @see U{http://randysofia.com/2014/06/06/aws-cli-and-your-locale/} 1046 """ 1047 with warnings.catch_warnings(): 1048 warnings.simplefilter("ignore") # So we don't print unicode warnings from comparisons 1049 1050 encoding = Diagnostics().encoding 1051 1052 failed = False 1053 for entry in sourceFiles: 1054 result = chardet.detect(entry) 1055 source = entry.decode(result["encoding"]) 1056 try: 1057 target = source.encode(encoding) 1058 if source != target: 1059 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", entry, result["encoding"], encoding) 1060 failed = True 1061 except UnicodeEncodeError: 1062 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", entry, result["encoding"], encoding) 1063 failed = True 1064 1065 if not failed: 1066 logger.info("Completed checking source filename encoding (no problems found).") 1067 else: 1068 logger.error("Some filenames have inconsistent encodings and will likely cause sync problems.") 1069 logger.error("You may be able to fix this by setting a more sensible locale in your environment.") 1070 logger.error("Aternately, you can rename the problem files to be valid in the indicated locale.") 1071 logger.error("To ignore this warning and proceed anyway, use --ignoreWarnings") 1072 raise ValueError("Some filenames have inconsistent encodings and will likely cause sync problems.")
1073 1074 1075 ################################ 1076 # _synchronizeBucket() function 1077 ################################ 1078
1079 -def _synchronizeBucket(sourceDir, s3BucketUrl):
1080 """ 1081 Synchronize a local directory to an Amazon S3 bucket. 1082 @param sourceDir: Local source directory 1083 @param s3BucketUrl: Target S3 bucket URL 1084 """ 1085 logger.info("Synchronizing local source directory up to Amazon S3.") 1086 args = [ "s3", "sync", sourceDir, s3BucketUrl, "--delete", "--recursive", ] 1087 result = executeCommand(AWS_COMMAND, args, returnOutput=False)[0] 1088 if result != 0: 1089 raise IOError("Error [%d] calling AWS CLI synchronize bucket." % result)
1090 1091 1092 ################################### 1093 # _verifyBucketContents() function 1094 ################################### 1095
1096 -def _verifyBucketContents(sourceDir, sourceFiles, s3BucketUrl):
1097 """ 1098 Verify that a source directory is equivalent to an Amazon S3 bucket. 1099 @param sourceDir: Local source directory 1100 @param sourceFiles: Filesystem list containing contents of source directory 1101 @param s3BucketUrl: Target S3 bucket URL 1102 """ 1103 # As of this writing, the documentation for the S3 API that we're using 1104 # below says that up to 1000 elements at a time are returned, and that we 1105 # have to manually handle pagination by looking for the IsTruncated element. 1106 # However, in practice, this is not true. I have been testing with 1107 # "aws-cli/1.4.4 Python/2.7.3 Linux/3.2.0-4-686-pae", installed through PIP. 1108 # No matter how many items exist in my bucket and prefix, I get back a 1109 # single JSON result. I've tested with buckets containing nearly 6000 1110 # elements. 1111 # 1112 # If I turn on debugging, it's clear that underneath, something in the API 1113 # is executing multiple list-object requests against AWS, and stiching 1114 # results together to give me back the final JSON result. The debug output 1115 # clearly incldues multiple requests, and each XML response (except for the 1116 # final one) contains <IsTruncated>true</IsTruncated>. 1117 # 1118 # This feature is not mentioned in the offical changelog for any of the 1119 # releases going back to 1.0.0. It appears to happen in the botocore 1120 # library, but I'll admit I can't actually find the code that implements it. 1121 # For now, all I can do is rely on this behavior and hope that the 1122 # documentation is out-of-date. I'm not going to write code that tries to 1123 # parse out IsTruncated if I can't actually test that code. 1124 1125 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1) 1126 1127 query = "Contents[].{Key: Key, Size: Size}" 1128 args = [ "s3api", "list-objects", "--bucket", bucket, "--prefix", prefix, "--query", query, ] 1129 (result, data) = executeCommand(AWS_COMMAND, args, returnOutput=True) 1130 if result != 0: 1131 raise IOError("Error [%d] calling AWS CLI verify bucket contents." % result) 1132 1133 contents = { } 1134 for entry in json.loads("".join(data)): 1135 key = entry["Key"].replace(prefix, "") 1136 size = long(entry["Size"]) 1137 contents[key] = size 1138 1139 failed = False 1140 for entry in sourceFiles: 1141 if os.path.isfile(entry): 1142 key = entry.replace(sourceDir, "") 1143 size = long(os.stat(entry).st_size) 1144 if not key in contents: 1145 logger.error("File was apparently not uploaded: [%s]", entry) 1146 failed = True 1147 else: 1148 if size != contents[key]: 1149 logger.error("File size differs [%s]: expected %s bytes but got %s bytes", entry, size, contents[key]) 1150 failed = True 1151 1152 if not failed: 1153 logger.info("Completed verifying Amazon S3 bucket contents (no problems found).") 1154 else: 1155 logger.error("There were differences between source directory and target S3 bucket.") 1156 raise ValueError("There were differences between source directory and target S3 bucket.")
1157 1158 1159 ######################################################################### 1160 # Main routine 1161 ######################################################################## 1162 1163 if __name__ == "__main__": 1164 sys.exit(cli()) 1165