diff options
Diffstat (limited to 'tools/scripts/divide_and_compress.py')
| -rwxr-xr-x | tools/scripts/divide_and_compress.py | 240 |
1 files changed, 127 insertions, 113 deletions
diff --git a/tools/scripts/divide_and_compress.py b/tools/scripts/divide_and_compress.py index d369be4a4..2bcb0ab67 100755 --- a/tools/scripts/divide_and_compress.py +++ b/tools/scripts/divide_and_compress.py @@ -36,89 +36,99 @@ from. __author__ = 'jmatt@google.com (Justin Mattson)' -from optparse import OptionParser +import optparse import os import stat import sys import zipfile -from zipfile import ZipFile import divide_and_compress_constants -def Main(argv): - parser = CreateOptionsParser() - (options, args) = parser.parse_args() - VerifyArguments(options, parser) - zipper = DirectoryZipper(options.destination, - options.sourcefiles, - ParseSize(options.filesize), - options.compress) - zipper.StartCompress() - - def CreateOptionsParser(): - rtn = OptionParser() + """Creates the parser for command line arguments. + + Returns: + A configured optparse.OptionParser object. + """ + rtn = optparse.OptionParser() rtn.add_option('-s', '--sourcefiles', dest='sourcefiles', default=None, help='The directory containing the files to compress') rtn.add_option('-d', '--destination', dest='destination', default=None, help=('Where to put the archive files, this should not be' ' a child of where the source files exist.')) rtn.add_option('-f', '--filesize', dest='filesize', default='1M', - help=('Maximum size of archive files. A number followed by' - 'a magnitude indicator, eg. 1000000B == one million ' - 'BYTES, 500K == five hundred KILOBYTES, 1.2M == one ' - 'point two MEGABYTES. 1M == 1048576 BYTES')) + help=('Maximum size of archive files. A number followed by ' + 'a magnitude indicator either "B", "K", "M", or "G". ' + 'Examples:\n 1000000B == one million BYTES\n' + ' 1.2M == one point two MEGABYTES\n' + ' 1M == 1048576 BYTES')) rtn.add_option('-n', '--nocompress', action='store_false', dest='compress', - default=True, + default=True, help=('Whether the archive files should be compressed, or ' 'just a concatenation of the source files')) return rtn def VerifyArguments(options, parser): + """Runs simple checks on correctness of commandline arguments. + + Args: + options: The command line options passed. + parser: The parser object used to parse the command string. + """ try: if options.sourcefiles is None or options.destination is None: parser.print_help() sys.exit(-1) - except (AttributeError), err: + except AttributeError: parser.print_help() sys.exit(-1) def ParseSize(size_str): + """Parse the file size argument from a string to a number of bytes. + + Args: + size_str: The string representation of the file size. + + Returns: + The file size in bytes. + + Raises: + ValueError: Raises an error if the numeric or qualifier portions of the + file size argument is invalid. + """ if len(size_str) < 2: raise ValueError(('filesize argument not understood, please include' ' a numeric value and magnitude indicator')) - magnitude = size_str[len(size_str)-1:] - if not magnitude in ('K', 'B', 'M'): - raise ValueError(('filesize magnitude indicator not valid, must be \'K\',' - '\'B\', or \'M\'')) - numeral = float(size_str[0:len(size_str)-1]) + magnitude = size_str[-1] + if not magnitude in ('B', 'K', 'M', 'G'): + raise ValueError(('filesize magnitude indicator not valid, must be "B",' + '"K","M", or "G"')) + numeral = float(size_str[:-1]) if magnitude == 'K': numeral *= 1024 elif magnitude == 'M': numeral *= 1048576 + elif magnitude == 'G': + numeral *= 1073741824 return int(numeral) class DirectoryZipper(object): - """Class to compress a directory and all its sub-directories.""" - current_archive = None - output_dir = None - base_path = None - max_size = None - compress = None - index_fp = None + """Class to compress a directory and all its sub-directories.""" def __init__(self, output_path, base_dir, archive_size, enable_compression): """DirectoryZipper constructor. Args: - output_path: the path to write the archives and index file to - base_dir: the directory to compress - archive_size: the maximum size, in bytes, of a single archive file - enable_compression: whether or not compression should be enabled, if - disabled, the files will be written into an uncompresed zip + output_path: A string, the path to write the archives and index file to. + base_dir: A string, the directory to compress. + archive_size: An number, the maximum size, in bytes, of a single + archive file. + enable_compression: A boolean, whether or not compression should be + enabled, if disabled, the files will be written into an uncompresed + zip. """ self.output_dir = output_path self.current_archive = '0.zip' @@ -126,6 +136,9 @@ class DirectoryZipper(object): self.max_size = archive_size self.compress = enable_compression + # Set index_fp to None, because we don't know what it will be yet. + self.index_fp = None + def StartCompress(self): """Start compress of the directory. @@ -133,7 +146,7 @@ class DirectoryZipper(object): specified output directory. It will also produce an 'index.txt' file in the output directory that maps from file to archive. """ - self.index_fp = open(''.join([self.output_dir, 'main.py']), 'w') + self.index_fp = open(os.path.join(self.output_dir, 'main.py'), 'w') self.index_fp.write(divide_and_compress_constants.file_preamble) os.path.walk(self.base_path, self.CompressDirectory, 1) self.index_fp.write(divide_and_compress_constants.file_endpiece) @@ -149,37 +162,32 @@ class DirectoryZipper(object): Args: archive_path: Path to the archive to modify. This archive should not be open elsewhere, since it will need to be deleted. - Return: - A new ZipFile object that points to the modified archive file + + Returns: + A new ZipFile object that points to the modified archive file. """ if archive_path is None: - archive_path = ''.join([self.output_dir, self.current_archive]) + archive_path = os.path.join(self.output_dir, self.current_archive) - # Move the old file and create a new one at its old location - ext_offset = archive_path.rfind('.') - old_archive = ''.join([archive_path[0:ext_offset], '-old', - archive_path[ext_offset:]]) + # Move the old file and create a new one at its old location. + root, ext = os.path.splitext(archive_path) + old_archive = ''.join([root, '-old', ext]) os.rename(archive_path, old_archive) old_fp = self.OpenZipFileAtPath(old_archive, mode='r') + # By default, store uncompressed. + compress_bit = zipfile.ZIP_STORED if self.compress: - new_fp = self.OpenZipFileAtPath(archive_path, - mode='w', - compress=zipfile.ZIP_DEFLATED) - else: - new_fp = self.OpenZipFileAtPath(archive_path, - mode='w', - compress=zipfile.ZIP_STORED) - - # Read the old archive in a new archive, except the last one - zip_members = enumerate(old_fp.infolist()) - num_members = len(old_fp.infolist()) - while num_members > 1: - this_member = zip_members.next()[1] - new_fp.writestr(this_member.filename, old_fp.read(this_member.filename)) - num_members -= 1 - - # Close files and delete the old one + compress_bit = zipfile.ZIP_DEFLATED + new_fp = self.OpenZipFileAtPath(archive_path, + mode='w', + compress=compress_bit) + + # Read the old archive in a new archive, except the last one. + for zip_member in old_fp.infolist()[:-1]: + new_fp.writestr(zip_member, old_fp.read(zip_member.filename)) + + # Close files and delete the old one. old_fp.close() new_fp.close() os.unlink(old_archive) @@ -193,11 +201,11 @@ class DirectoryZipper(object): mode = 'w' if mode == 'r': - return ZipFile(path, mode) + return zipfile.ZipFile(path, mode) else: - return ZipFile(path, mode, compress) + return zipfile.ZipFile(path, mode, compress) - def CompressDirectory(self, irrelevant, dir_path, dir_contents): + def CompressDirectory(self, unused_id, dir_path, dir_contents): """Method to compress the given directory. This method compresses the directory 'dir_path'. It will add to an existing @@ -206,40 +214,35 @@ class DirectoryZipper(object): mapping of files to archives to the self.index_fp file descriptor Args: - irrelevant: a numeric identifier passed by the os.path.walk method, this - is not used by this method - dir_path: the path to the directory to compress - dir_contents: a list of directory contents to be compressed + unused_id: A numeric identifier passed by the os.path.walk method, this + is not used by this method. + dir_path: A string, the path to the directory to compress. + dir_contents: A list of directory contents to be compressed. """ - - # construct the queue of files to be added that this method will use + # Construct the queue of files to be added that this method will use # it seems that dir_contents is given in reverse alphabetical order, - # so put them in alphabetical order by inserting to front of the list + # so put them in alphabetical order by inserting to front of the list. dir_contents.sort() zip_queue = [] - if dir_path[len(dir_path) - 1:] == os.sep: - for filename in dir_contents: - zip_queue.append(''.join([dir_path, filename])) - else: - for filename in dir_contents: - zip_queue.append(''.join([dir_path, os.sep, filename])) + for filename in dir_contents: + zip_queue.append(os.path.join(dir_path, filename)) compress_bit = zipfile.ZIP_DEFLATED if not self.compress: compress_bit = zipfile.ZIP_STORED - # zip all files in this directory, adding to existing archives and creating - # as necessary - while len(zip_queue) > 0: + # Zip all files in this directory, adding to existing archives and creating + # as necessary. + while zip_queue: target_file = zip_queue[0] if os.path.isfile(target_file): self.AddFileToArchive(target_file, compress_bit) - - # see if adding the new file made our archive too large + + # See if adding the new file made our archive too large. if not self.ArchiveIsValid(): - + # IF fixing fails, the last added file was to large, skip it # ELSE the current archive filled normally, make a new one and try - # adding the file again + # adding the file again. if not self.FixArchive('SIZE'): zip_queue.pop(0) else: @@ -248,7 +251,7 @@ class DirectoryZipper(object): 0:self.current_archive.rfind('.zip')]) + 1) else: - # if this the first file in the archive, write an index record + # Write an index record if necessary. self.WriteIndexRecord() zip_queue.pop(0) else: @@ -260,10 +263,10 @@ class DirectoryZipper(object): Only write an index record if this is the first file to go into archive Returns: - True if an archive record is written, False if it isn't + True if an archive record is written, False if it isn't. """ archive = self.OpenZipFileAtPath( - ''.join([self.output_dir, self.current_archive]), 'r') + os.path.join(self.output_dir, self.current_archive), 'r') archive_index = archive.infolist() if len(archive_index) == 1: self.index_fp.write( @@ -279,54 +282,56 @@ class DirectoryZipper(object): """Make the archive compliant. Args: - problem: the reason the archive is invalid + problem: An enum, the reason the archive is invalid. Returns: Whether the file(s) removed to fix the archive could conceivably be in an archive, but for some reason can't be added to this one. """ - archive_path = ''.join([self.output_dir, self.current_archive]) - rtn_value = None - + archive_path = os.path.join(self.output_dir, self.current_archive) + return_value = None + if problem == 'SIZE': archive_obj = self.OpenZipFileAtPath(archive_path, mode='r') num_archive_files = len(archive_obj.infolist()) - + # IF there is a single file, that means its too large to compress, # delete the created archive - # ELSE do normal finalization + # ELSE do normal finalization. if num_archive_files == 1: print ('WARNING: %s%s is too large to store.' % ( self.base_path, archive_obj.infolist()[0].filename)) archive_obj.close() os.unlink(archive_path) - rtn_value = False + return_value = False else: - self.RemoveLastFile(''.join([self.output_dir, self.current_archive])) archive_obj.close() + self.RemoveLastFile( + os.path.join(self.output_dir, self.current_archive)) print 'Final archive size for %s is %i' % ( - self.current_archive, os.stat(archive_path)[stat.ST_SIZE]) - rtn_value = True - return rtn_value + self.current_archive, os.path.getsize(archive_path)) + return_value = True + return return_value def AddFileToArchive(self, filepath, compress_bit): """Add the file at filepath to the current archive. Args: - filepath: the path of the file to add - compress_bit: whether or not this fiel should be compressed when added + filepath: A string, the path of the file to add. + compress_bit: A boolean, whether or not this file should be compressed + when added. Returns: True if the file could be added (typically because this is a file) or - False if it couldn't be added (typically because its a directory) + False if it couldn't be added (typically because its a directory). """ - curr_archive_path = ''.join([self.output_dir, self.current_archive]) - if os.path.isfile(filepath): - if os.stat(filepath)[stat.ST_SIZE] > 1048576: + curr_archive_path = os.path.join(self.output_dir, self.current_archive) + if os.path.isfile(filepath) and not os.path.islink(filepath): + if os.path.getsize(filepath) > 1048576: print 'Warning: %s is potentially too large to serve on GAE' % filepath archive = self.OpenZipFileAtPath(curr_archive_path, compress=compress_bit) - # add the file to the archive + # Add the file to the archive. archive.write(filepath, filepath[len(self.base_path):]) archive.close() return True @@ -340,13 +345,22 @@ class DirectoryZipper(object): The thought is that eventually this will do additional validation Returns: - True if the archive is valid, False if its not + True if the archive is valid, False if its not. """ - archive_path = ''.join([self.output_dir, self.current_archive]) - if os.stat(archive_path)[stat.ST_SIZE] > self.max_size: - return False - else: - return True + archive_path = os.path.join(self.output_dir, self.current_archive) + return os.path.getsize(archive_path) <= self.max_size + + +def main(argv): + parser = CreateOptionsParser() + (options, unused_args) = parser.parse_args(args=argv[1:]) + VerifyArguments(options, parser) + zipper = DirectoryZipper(options.destination, + options.sourcefiles, + ParseSize(options.filesize), + options.compress) + zipper.StartCompress() + if __name__ == '__main__': - Main(sys.argv) + main(sys.argv) |
