#!/usr/bin/env python # Copyright 2008 ETH Zuerich, CISD # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## # Removes spurious data set directory and moves its content to appropriate data set directory. # # @author: Izabela Adamczyk ## SHOW_INFO = True import os, sys, string, shutil from optparse import OptionParser SAMPLE_PREFIX = 'Sample_' DATASET_PREFIX = 'Dataset_' PROJECT_PREFIX = 'Project_' def extractProjectPath(path): """ Extracts project path in given path by removing everything that prepends PROJECT_PREFIX """ return path[string.find(path, PROJECT_PREFIX):] class DataSet: """ Small class which defines a data set, composed of a code and a location. Both are unique. """ def __init__(self, code, location): self.code = code self.location = location def __str__(self): return self.code +"\t"+ self.location def checkDirectory(path): """ Stops execution of the script if directory does not exist """ if os.path.isdir(path): info("Processing directory " + path) else: fatalError("Directory not found: " + path) def checkDoesNotExist(file): """ Stops execution of the script if path exists """ if os.path.exists(file): fatalError(file + " already exist, won't overwrite.") def warning(message): """ Prints warning message """ print "WARNING:", message def isSpuriousDatasetDir(directory): """ Whether given directory is a spurious data set directory or not. """ parent = os.path.dirname(directory) grandParent = os.path.dirname(parent) try: return os.path.basename(directory) != "version" \ and os.path.basename(directory) != "data" \ and os.path.basename(directory) != "metadata" \ and os.path.basename(directory) != "annotations" \ and os.path.basename(grandParent).startswith(SAMPLE_PREFIX) \ and (os.path.basename(parent).startswith(DATASET_PREFIX)); except ValueError, e: return False def writeMappingFile(dataSets, mappingFile): """ Writes out the mapping file with given code-locations mappings """ if dataSets == []: info("No data set to update - location mapping file has not been not created.") return writer = open(mappingFile, "w") try: for dataSet in dataSets: print >>writer, (dataSet) finally: writer.close() def move(src,dst): info("mv" + " " + src + " " + dst) shutil.move(src, dst) def remove(path): info("rm " + path) os.rmdir(path) def fatalError(message): """ Prints error and exits """ messageWithError = "ERROR:", message print messageWithError sys.exit(1) def info(message): """ If SHOW_INFO is true, prints '' """ if SHOW_INFO: print message def dirWalk(directory, maxLevel = 10): """ Walks a directory tree, using a generator, renames certain directories and updates given data sets map on the way. """ if maxLevel == 0: return dirName = os.path.basename(directory) for subDirName in os.listdir(directory): subDirectory = os.path.join(directory, subDirName) if not os.path.isdir(subDirectory): continue if isSpuriousDatasetDir(subDirectory): yield subDirectory if not os.path.exists(subDirectory): continue for x in dirWalk(subDirectory, maxLevel - 1): yield x def main(): """ Main method. """ parser = OptionParser("usage: %prog ") (options, args) = parser.parse_args() if len(args) == 2: instanceName = args[0] storeRoot = os.path.dirname(instanceName) mappingFile = args[1] else: parser.print_help() sys.exit(1) info("Moving data sets started.") checkDirectory(instanceName) checkDoesNotExist(mappingFile) dirsToRemove = [] dataSets = [] numberOfProblems = 0; for dirpath in dirWalk(instanceName, 8): try: dirnames = os.listdir(dirpath) datasetDir = os.path.dirname(dirpath) sampleDir = os.path.dirname(datasetDir) location = datasetDir key, code = string.split(os.path.basename(datasetDir),"_", 1) dataSet = DataSet(code, location[len(storeRoot) + 1:]) for dirname in dirnames: src = os.path.abspath(os.path.join(dirpath, dirname)) if not os.path.isdir(src): continue dst = os.path.abspath(os.path.join(datasetDir, dirname)) move(src, dst) dataSets.append(dataSet) dirsToRemove.append(os.path.abspath(dirpath)) info("Successfully moved " + dirpath) except ValueError, e: numberOfProblems += 1 warning("Problem with moving data set " + dirpath) print e writeMappingFile(dataSets, mappingFile) if numberOfProblems > 0: info(numberOfProblems) info("Removing all spurious directories.") for dirToRemove in dirsToRemove: remove(dirToRemove) info("Finished.") if __name__ == '__main__': main()