#!/usr/bin/env python # -*- coding: utf-8 -*- # # fillmedia.py # # Based on "Optimally filling up storage - to the rim (Dynamic Programming)" by Thanassis Tsiodras # http://users.softlab.ece.ntua.gr/~ttsiod/fillupDVD.html # # 2012 Miguel Ángel Molina # ## # imports # import getopt import sys import os from operator import itemgetter from itertools import combinations from time import time import shutil ## # some global vars # __version__ = '0.1' __author__ = u'Miguel Ángel Molina (sslapp@gmail.com)' # capacities in bytes by media types allowed capacities = {'cd-650': 681984000.0, 'cd-700': 737280000.0, 'cd-800': 829440000.0, 'cd-900': 912384000.0, 'dvd-r-sl': 4707319808.0, 'dvd+r-sl': 4700372992.0, 'dvd-r-dl': 8543666176.0, 'dvd+r-dl': 8547991552.0} units = {'kB': 10.0 ** 3, 'KiB': 2.0 ** 10, 'MB': 10.0 ** 6, 'MiB': 2.0 ** 20} ## # defaults # # no logging logging = False # no dry run dry = False sourcedir = None # destination directory = source directory destdir = None media = None size = None # default unit Megabyte unitname = 'MB' unit = units[unitname] scale = 1.0 # effective unit efunit = unit * scale # default threshold 1% of wasted space threshold = 1.0 def version(): """Show author and version""" print "{0:s}, version: {1:s}".format(os.path.basename(sys.argv[0]),__version__) print u"Author: {0:s}".format(__author__) def usage(): """Show usage instructions""" print """Usage: fillmedia.py [-h|--help]|[-v|--version]|[-t|--types]|-i source.dir|--indir=source.dir [-o dest.dir|--outdir=dest.dir] -m media.type|--media=media.type [-u unit|--unit=unit][-s scale|--scale=scale][-l limit|--limit=limit][-d|--dry][-r log.file|--reg=log.file] -h | --help Show this help and exit -v | --version Show author & version and exit -t | --types Show media types & units allowed and exit -i source.dir | --indir=source.dir Source directory -o dest.dir | --outdir=dest.dir Destination directory (default = source.dir) -m media | --media=media Media type to fill -u unit | --unit=unit Unit of measurement (default = 'MB') -s scale | --scale=scale Unit scale (default = 1.0) -l limit | --limit limit Percent threshold to stop processing (default = 1) -d | --dry Do nothing on disk, dry-run -r log.file | --reg=log.file Redirects output to log.file """ def show(): """Show media types and units allowed""" print "Media types allowed:" print "----------------------" for key, size in iter(sorted(capacities.iteritems(),key=itemgetter(1))): print "{0: >10s}: {1: >8.2f} MB".format(key,size/units['MB']) print print "Units allowed:" print "----------------" for key, value in iter(sorted(units.iteritems(),key=itemgetter(1))): print "{0: >10s}: {1: >8.0f} Bytes".format(key,value) def do_fit(): """Calculate best fit of media selected""" print "Starting..." print "Unit selected: {0:.2f} {1:s}".format(efunit / unit,unitname) print "Threshold: {0:3.2f}%".format(threshold) print "Getting files...", # files to be processed listoffiles = [] for filename in os.listdir(sourcedir): # file full path filename = os.path.join(sourcedir,filename) if os.path.isfile(filename): # file real size in bytes realfilesize = os.path.getsize(filename) # file size depends on sector size = 2 KiB filesize = int(round(realfilesize/2048+0.5)*2048) + 1024 # file system entry size added # working file size rounded up filesize = int(round(filesize/efunit+0.5)) listoffiles.append([filename,filesize,realfilesize]) print "Done!" mediasize = int(size/efunit) disknumber = 0 efficiency = 0 useddisks = 0 # Process files while len(listoffiles) > 0: disknumber += 1 print print "Disk number: {0:n}".format(disknumber) print "-"*20 print "Extracting file sizes...", # isolate working file sizes listofsizes = [x[1] for x in listoffiles] numberoffiles = len(listofsizes)-1 print "Done!" print "Processing: {0:n} files".format(numberoffiles+1) optimalresult = {} laststep = {} for containersize in xrange(0, mediasize+1): # containersize takes values 0 .. mediasize if not logging: sys.stdout.write("{0:3.2f}% complete...".format(containersize*100.0/mediasize)+' '*30+'\r') sys.stdout.flush() for idx,filesize in enumerate(listofsizes): cellcurrent = (containersize, idx) cellontheleftofcurrent = (containersize, idx-1) # if file doesn't fit into container if containersize optimalresult.get(cellontheleftofcurrent,0): # we improved the best result, using the column "idx"! optimalresult[cellcurrent] = optimalresultofremainingspace + filesize laststep[cellcurrent] = filesize else: # no improvement... optimalresult[cellcurrent] = optimalresult.get(cellontheleftofcurrent,0) laststep[cellcurrent] = laststep.get(cellontheleftofcurrent,0) else: print finalchosenlist = [] total = optimalresult[(mediasize, numberoffiles)] attainable = total * efunit print "Objective: {0:.2f} Bytes".format(size) print "Attainable: {0:.2f} Bytes".format(attainable) if int(total) == 0: print "No file fits on media, aborting..." break realsize = 0 fileschoosed = 0 # walk the build path in reverse order to get the files involved in the solution while total>0: lastfilesize = laststep[(total, numberoffiles)] if lastfilesize != 0: for fileitem in listoffiles: # fileitem[0] = full path filename # fileitem[1] = working file size # fileitem[2] = real file size in bytes # search lastfilesize if fileitem[1] == lastfilesize: # found! Add it finalchosenlist.append(fileitem[0]) fileschoosed += 1 realsize += fileitem[2] # remove the file from the list of files listoffiles.remove(fileitem) # stop searching break else: assert(False) # we should have found the file # total now points to next step backwards total -= lastfilesize # calculate percent real wasted space on media wasted = 100.0 - (realsize * 100.0 / size) print "Real size: {0:.2f} Bytes".format(realsize) print "Wasted space: {0:3.2f}%".format(wasted) print if wasted > threshold: print "Threshold reached." break efficiency += wasted print "Disk number: {0:n} contains: {1:n} files".format(disknumber,fileschoosed) print "Moving files to:", try: diskdir = os.path.join(destdir,'disk'+str(disknumber)) if not dry: os.mkdir(diskdir) except: print "Error! Can't create: {0:s}".format(diskdir) print diskdir print for final in finalchosenlist: if not dry: try: shutil.move(final, diskdir) except: print "Error! Can't move: {0:s}".format(final) continue print final useddisks += 1 else: print "No more files left" print try: print "Total efficiency: {0:3.2f}%".format(100.0 - (efficiency / useddisks)) except: print "No disks generated!" print return def main(): """main""" global sourcedir, destdir, media, size, dry, unitname, unit, efunit, scale, threshold, logging # Parse command line options try: opts, args = getopt.getopt(sys.argv[1:], 'hvti:o:m:u:s:l:dr:', ['help', 'version', 'types', 'indir=', 'outdir=', 'media=', 'unit=', 'scale=', 'limit=', 'dry', 'reg=']) except getopt.GetoptError as err: print "Error: {0:s}".format(err.msg) usage() return 2 for o, a in opts: if o in ('-h', '--help'): usage() return 0 elif o in ('-v', '--version'): version() return 0 elif o in ('-t', '--types'): show() return 0 elif o in ('-i', '--indir'): if os.path.isdir(a): sourcedir = a else: print "{0:s} is not a valid directory".format(a) return -1 elif o in ('-o', '--outdir'): if os.path.isdir(a): destdir = a else: print "{0:s} is not a valid directory".format(a) return -1 elif o in ('-m', '--media'): if a in capacities: media = a size = capacities[media] else: print "{0:s} is not a valid media type".format(a) show() return -1 elif o in ('-u', '--unit'): if a in units: unitname = a unit = units[unitname] else: print "{0:s} is not a valid unit".format(a) show() return -1 elif o in ('-s', '--scale'): try: scale = float(a) if scale <= 0: print "Scale must be a positive number greater than 0. {0:s} given".format(a) return -1 except ValueError: print "{0:s} is not a valid number".format(a) show() return -1 elif o in ('-l', '--limit'): try: threshold = float(a) if threshold < 0 or threshold > 100: print "Threshold must be between 0 and 100. {0:s} given".format(a) return -1 except ValueError: print "{0:s} is not a valid number".format(a) return -1 elif o in ('-d', '--dry'): dry = True elif o in ('-r', '--reg'): sys.stdout = open(a,'w') logging = True efunit = unit * scale errors = False if sourcedir == None: print "You must specify the source directory, use -i or --indir option" errors = True if destdir == None: destdir = sourcedir if media == None: print "You must specify the media type, use -m or --media option." usage() show() errors = True if errors: return -1 if dry: print "Dry run. Changes will not be written on disk." timestart = time() do_fit() timeend = time() elapsedtime = timeend - timestart hours = int(elapsedtime / (60 * 60)) minutes = int(elapsedtime / 60) - hours * 60 seconds = int(elapsedtime % 60) miliseconds = int((elapsedtime - int(elapsedtime)) * 1000) print "Elapsed time: {0:2n}h {1:2n}m {2:2n}s {3:2n}ms".format(hours,minutes,seconds,miliseconds) print # reset stdout sys.stdout = sys.__stdout__ return 0 if __name__ == '__main__': main()