#! /usr/bin/env python # This is an example Jython dropbox for importing HCS image datasets import os import shutil import random import ch.systemsx.cisd.openbis.generic.shared.basic.dto as dto from ch.systemsx.cisd.openbis.generic.shared.basic.dto import SampleType, NewSample from ch.systemsx.cisd.openbis.generic.shared.dto.identifier import SampleIdentifier from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import * from ch.systemsx.cisd.openbis.dss.etl.custom.geexplorer import GEExplorerImageAnalysisResultParser from java.io import File from ch.systemsx.cisd.common.fileconverter import FileConverter, Tiff2PngConversionStrategy from ch.systemsx.cisd.common.mail import From from ch.systemsx.cisd.openbis.generic.shared.dto import NewProperty # ------------ # Dropbox specific image dataset registration. You may want to modify this part. # ------------ """ type of the new image dataset """ IMAGE_DATASET_TYPE = "HCS_IMAGE" """ file format code of files in a new image dataset """ IMAGE_FILE_FORMAT = "TIFF" """ type of the new analysis dataset """ ANALYSIS_DATASET_TYPE = "HCS_IMAGE_ANALYSIS_DATA" """ file format of the analysis dataset """ ANALYSIS_FILE_FORMAT = "CSV" """ type of the new image overlay dataset """ OVERLAY_IMAGE_DATASET_TYPE = "HCS_IMAGE_SEGMENTATION_OVERLAY" """ file format of the image overlay dataset """ OVERLAY_IMAGE_FILE_FORMAT = "PNG" """ space where the plate for which the dataset has been acquired exist """ PLATE_SPACE = "DEMO" """ only files with these extensions will be recognized as images """ RECOGNIZED_IMAGES_EXTENSIONS = ["tiff", "tif", "png", "gif", "jpg", "jpeg"] """ should thumbnails be generated? """ GENERATE_THUMBNAILS = True """ the maximal width and height of the generated thumbnails """ MAX_THUMNAIL_WIDTH_AND_HEIGHT = 256 """ number of threads that are used for thumbnail generation will be equal to: this constant * number of processor cores """ ALLOWED_MACHINE_LOAD_DURING_THUMBNAIL_GENERATION = 1.0 """ should all dataset in one experiment use the same channels? """ STORE_CHANNELS_ON_EXPERIMENT_LEVEL = False """ should the original data be stored in the original form or should we pack them into one container? """ ORIGINAL_DATA_STORAGE_FORMAT = OriginalDataStorageFormat.UNCHANGED # --------- """ name of the color which should be treated as transparent in overlays """ OVERLAYS_TRANSPARENT_COLOR = "black" """ sample type code of the plate, needed if a new sample is registered automatically """ PLATE_TYPE_CODE = "PLATE" """ project and experiment where new plates will be registered """ DEFAULT_PROJECT_CODE = "TEST" DEFAULT_EXPERIMENT_CODE = "SANOFI" PLATE_GEOMETRY_PROPERTY_CODE = "$PLATE_GEOMETRY" PLATE_GEOMETRY = "384_WELLS_16X24" ANALYSIS_RUN_PROPERTY_CODE = "ANALYSIS_RUN" # --------- """ extracts code of the sample from the directory name """ def extract_sample_code(incoming_name): file_basename = extract_file_basename(incoming_name) code = file_basename.split("_")[1] if code == "": code = file_basename return code """ For a given tile number and tiles geometry returns a (x,y) tuple which describes where the tile is located on the well. """ def get_tile_coords(tile_num, tile_geometry): columns = tile_geometry[1] row = ((tile_num - 1) / columns) + 1 col = ((tile_num - 1) % columns) + 1 return (row, col) """ Parameters: image_tokens_list - list of ImageTokens Returns: (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well """ def get_tile_geometry(image_tokens_list): max_tile = get_max_tile_number(image_tokens_list) if max_tile % 4 == 0 and max_tile != 4: return (max_tile / 4, 4) elif max_tile % 3 == 0: return (max_tile / 3, 3) elif max_tile % 2 == 0: return (max_tile / 2, 2) else: return (max_tile, 1) """ Creates ImageFileInfo for a given ImageTokens. Converts tile number to coordinates on the 'well matrix'. Example file name: A - 1(fld 1 wv Cy5 - Cy5).tif Returns: ImageTokens """ def create_image_tokens(path): image_tokens = ImageTokens() image_tokens.path = path basename = os.path.splitext(path)[0] wellText = basename[0:find(basename, "(")] # A - 1 image_tokens.well = wellText.replace(" - ", "") if " wv " in basename: fieldText = basename[find(basename, "fld ") + 4 : find(basename, " wv")] image_tokens.channel = basename[rfind(basename, " - ") + 3 :-1] else: fieldText = basename[find(basename, "fld ") + 4 : find(basename, ")")] image_tokens.channel = "DEFAULT" try: image_tokens.tile = int(fieldText) #print "image_tokens.tile", image_tokens.tile except ValueError: raise Exception("Cannot parse field number from '" + fieldText + "' in '" + basename + "' file name.") return image_tokens # ------------ # END of the part which you will probably need to modify # ------------ # ------------ # Generic utility # ------------ """ Finds first occurence of the patter from the right. Throws exception if the pattern cannot be found. """ def rfind(text, pattern): ix = text.rfind(pattern) ensurePatternFound(ix, text, pattern) return ix """ Finds first occurence of the patter from the left. Throws exception if the pattern cannot be found. """ def find(text, pattern): ix = text.find(pattern) ensurePatternFound(ix, text, pattern) return ix def ensurePatternFound(ix, file, pattern): if ix == -1: raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'") """ Returns: name of the file without the extension """ def extract_file_basename(filename): base_with_ext = os.path.split(filename)[1] if os.path.isfile(base_with_ext) : return os.path.splitext(base_with_ext)[0] else: return base_with_ext """ Returns: extension of the file """ def get_file_ext(file): return os.path.splitext(file)[1][1:].lower() """ Returns: java.io.File - first file with the specified extension or None if no file matches """ def find_file_by_ext(incoming_file, expected_ext): if not incoming_file.isDirectory(): return None incoming_path = incoming_file.getPath() for file in os.listdir(incoming_path): ext = get_file_ext(file) if ext.upper() == expected_ext.upper(): return File(incoming_path, file) return None """ Returns: java.io.File - subdirectory which contains the specified marker in the name """ def find_dir(incoming_file, dir_name_marker): if not incoming_file.isDirectory(): return None incoming_path = incoming_file.getPath() for file in os.listdir(incoming_path): if dir_name_marker.upper() in file.upper(): return File(incoming_path, file) return None # ------------ # Image dataset registration # ------------ """ Auxiliary function to extract all channel codes used by specified images. The channel label will be equal to channel code. Parameters: images - list of ImageFileInfo Returns: list of Channel """ def get_available_channels(images): channel_codes = {} for image in images: channel_codes[image.getChannelCode()] = 1 channels = [] for channelCode in channel_codes.keys(): channels.append(Channel(channelCode, channelCode)) return channels """ Parameters: dataset - BasicDataSetInformation registration_details - DataSetRegistrationDetails """ def set_dataset_details(dataset, registration_details): registration_details.setDataSetInformation(dataset) registration_details.setFileFormatType(dataset.getFileFormatTypeCode()) registration_details.setDataSetType(dataset.getDataSetType()) registration_details.setMeasuredData(dataset.isMeasured()) def set_image_dataset_storage_config(image_dataset): config = ImageStorageConfiguraton.createDefault() config.setStoreChannelsOnExperimentLevel(STORE_CHANNELS_ON_EXPERIMENT_LEVEL) config.setOriginalDataStorageFormat(ORIGINAL_DATA_STORAGE_FORMAT) if GENERATE_THUMBNAILS: thumbnailsStorageFormat = ThumbnailsStorageFormat() thumbnailsStorageFormat.setAllowedMachineLoadDuringGeneration(ALLOWED_MACHINE_LOAD_DURING_THUMBNAIL_GENERATION) thumbnailsStorageFormat.setMaxWidth(MAX_THUMNAIL_WIDTH_AND_HEIGHT) thumbnailsStorageFormat.setMaxHeight(MAX_THUMNAIL_WIDTH_AND_HEIGHT) config.setThumbnailsStorageFormat(thumbnailsStorageFormat) image_dataset.setImageStorageConfiguraton(config) """ Parameters: dataset - BasicDataSetInformation Returns: DataSetRegistrationDetails """ def create_image_dataset_details(incoming): registration_details = factory.createImageRegistrationDetails() image_dataset = registration_details.getDataSetInformation() set_image_dataset(incoming, image_dataset) set_image_dataset_storage_config(image_dataset) set_dataset_details(image_dataset, registration_details) return registration_details """ Returns: integer - maximal tile number """ def get_max_tile_number(image_tokens_list): max_tile = 0 for image_tokens in image_tokens_list: max_tile = max(max_tile, image_tokens.tile) return max_tile """ Auxiliary structure to store tokens of the image file name. """ class ImageTokens: # channel code channel = None # tile number tile = -1 # path to the image path = "" # well code, e.g. A1 well = "" """ Creates ImageFileInfo for a given path to an image Example of the accepted file name: A - 1(fld 1 wv Cy5 - Cy5).tif Returns: ImageFileInfo """ def create_image_info(image_tokens, tile_geometry): tileCoords = get_tile_coords(image_tokens.tile, tile_geometry) img = ImageFileInfo(image_tokens.channel, tileCoords[0], tileCoords[1], image_tokens.path) img.setWell(image_tokens.well) return img """ Tokenizes file names of all images in the directory. Returns: list of ImageTokens """ def parse_image_tokens(dir, recognized_image_extensions): image_tokens_list = [] dir_path = dir.getPath() for file in os.listdir(dir_path): ext = get_file_ext(file) try: extIx = recognized_image_extensions.index(ext) # not reached if extension not found image_tokens = create_image_tokens(file) #print "tile", image_tokens.tile, "path", image_tokens.path, "well", image_tokens.well image_tokens_list.append(image_tokens) except ValueError: pass # extension not recognized return image_tokens_list """ Parameters: - image_tokens_list - list of ImageTokens for each image - tile_geometry - (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well Returns: list of ImageFileInfo """ def create_image_infos(image_tokens_list, tile_geometry): images = [] for image_tokens in image_tokens_list: image = create_image_info(image_tokens, tile_geometry) images.append(image) return images # --------------------- """ Extracts all images from the incoming directory. Parameters: incoming - java.io.File, folder with images dataset - ImageDataSetInformation where the result will be stored """ def set_image_dataset(incoming, dataset): dataset.setDatasetTypeCode(IMAGE_DATASET_TYPE) dataset.setFileFormatCode(IMAGE_FILE_FORMAT) sample_code = extract_sample_code(incoming.getName()) dataset.setSample(PLATE_SPACE, sample_code) dataset.setMeasured(True) image_tokens_list = parse_image_tokens(incoming, RECOGNIZED_IMAGES_EXTENSIONS) tile_geometry = get_tile_geometry(image_tokens_list) images = create_image_infos(image_tokens_list, tile_geometry) channels = get_available_channels(images) dataset.setImages(images) dataset.setChannels(channels) dataset.setTileGeometry(tile_geometry[0], tile_geometry[1]) return dataset """ Extracts all overlay images from the overlays_dir directory. Parameters: overlays_dir - java.io.File, folder with image_dataset - ImageDataSetInformation, image dataset to which the overlay dataset belongs img_dataset_code - string, code of the image dataset to which the overlay dataset belongs overlay_dataset - ImageDataSetInformation where the result will be stored extension - accepted image file extensions """ def set_overlay_dataset(overlays_dir, image_dataset, img_dataset_code, overlay_dataset, extension): overlay_dataset.setDatasetTypeCode(OVERLAY_IMAGE_DATASET_TYPE) overlay_dataset.setFileFormatCode(OVERLAY_IMAGE_FILE_FORMAT) overlay_dataset.setSample(image_dataset.getSpaceCode(), image_dataset.getSampleCode()) overlay_dataset.setMeasured(False) overlay_dataset.setParentDatasetCode(img_dataset_code) if extension == None: recognized_image_exts = RECOGNIZED_IMAGES_EXTENSIONS else: recognized_image_exts = [ extension ] image_tokens_list = parse_image_tokens(overlays_dir, recognized_image_exts) tile_geometry = (image_dataset.getTileRowsNumber(), image_dataset.getTileColumnsNumber()) images = create_image_infos(image_tokens_list, tile_geometry) channels = get_available_channels(images) overlay_dataset.setImages(images) overlay_dataset.setChannels(channels) overlay_dataset.setTileGeometry(tile_geometry[0], tile_geometry[1]) """ Creates registration details of the image overlays dataset. Parameters: overlays_dir - java.io.File, folder with image_dataset - ImageDataset, image dataset to which the overlay dataset belongs img_dataset_code - string, code of the image dataset to which the overlay dataset belongs Returns: DataSetRegistrationDetails """ def create_overlay_dataset_details(overlays_dir, image_dataset, img_dataset_code, extension): overlay_dataset_details = factory.createImageRegistrationDetails() overlay_dataset = overlay_dataset_details.getDataSetInformation() set_overlay_dataset(overlays_dir, image_dataset, img_dataset_code, overlay_dataset, extension) set_dataset_details(overlay_dataset, overlay_dataset_details) set_image_dataset_storage_config(overlay_dataset) config = overlay_dataset.getImageStorageConfiguraton() # channels will be connected to the dataset config.setStoreChannelsOnExperimentLevel(False) if GENERATE_THUMBNAILS: # overlay thumbnails should be generated with higher quality thumbnailsStorageFormat = config.getThumbnailsStorageFormat() thumbnailsStorageFormat.setHighQuality(True); config.setThumbnailsStorageFormat(thumbnailsStorageFormat) overlay_dataset.setImageStorageConfiguraton(config) return overlay_dataset_details # --------------------- """ Creates the analysis dataset description. The dataset will be connected to the specified sample and parent dataset. Parameters: dataset - BasicDataSetInformation where the result will be stored """ def set_analysis_dataset(sample_space, sample_code, parent_dataset_code, dataset): dataset.setDatasetTypeCode(ANALYSIS_DATASET_TYPE) dataset.setFileFormatCode(ANALYSIS_FILE_FORMAT) dataset.setSample(sample_space, sample_code) dataset.setMeasured(False) dataset.setParentDatasetCode(parent_dataset_code) """ Creates registration details of the analysis dataset. Returns: DataSetRegistrationDetails """ def create_analysis_dataset_details(sample_space, sample_code, parent_dataset_code, analysis_run): registration_details = factory.createBasicRegistrationDetails() dataset = registration_details.getDataSetInformation() set_analysis_dataset(sample_space, sample_code, parent_dataset_code, dataset) analysis_run_property = NewProperty(ANALYSIS_RUN_PROPERTY_CODE, analysis_run) dataset.setDataSetProperties([ analysis_run_property ]) set_dataset_details(dataset, registration_details) return registration_details """ registers sample if it does not exist already """ def register_sample_if_necessary(space_code, project_code, experiment_code, sample_code): openbis = state.getOpenBisService() sampleIdentifier = SampleIdentifier.create(space_code, sample_code) if (openbis.tryGetSampleWithExperiment(sampleIdentifier) == None): sample = NewSample() sampleType = SampleType() sampleType.setCode(PLATE_TYPE_CODE) sample.setSampleType(sampleType) sample.setIdentifier(sampleIdentifier.toString()) property = dto.VocabularyTermEntityProperty(); vocabularyTerm = dto.VocabularyTerm(); vocabularyTerm.setCode(PLATE_GEOMETRY); property.setVocabularyTerm(vocabularyTerm); propertyType = dto.PropertyType(); dataType = dto.DataType(); dataType.setCode(dto.DataTypeCode.CONTROLLEDVOCABULARY); propertyType.setDataType(dataType); propertyType.setCode(PLATE_GEOMETRY_PROPERTY_CODE); property.setPropertyType(propertyType); sample.setProperties([ property ]) sample.setExperimentIdentifier("/" + space_code + "/" + project_code + "/" + experiment_code) openbis.registerSample(sample, None) # --------------------- def debug(*msg): print "".join(msg) def convert_to_png(dir, transparent_color): delete_original_files = True strategy = Tiff2PngConversionStrategy(transparent_color, 0, delete_original_files) # Uses #cores * machineLoad threads for the conversion, but not more than maxThreads machineLoad = ALLOWED_MACHINE_LOAD_DURING_THUMBNAIL_GENERATION maxThreads = 100 errorMsg = FileConverter.performConversion(File(dir), strategy, machineLoad, maxThreads) if errorMsg != None: raise Exception("Error", errorMsg) def notify(plate_code): content = "Dear Mr./Mrs.\n" hostname = "http://bwdl27.bw.f2.enterprise:8080/openbis" plate_link = hostname+"?viewMode=simple#entity=SAMPLE&action=SEARCH&code="+plate_code+"&sample_type=PLATE" content += "Data for plate : " + plate_code + " has been registered : \n" + plate_link + "\n" content += "\n" content += "Have a nice day!\n" content += " openBIS\n" replyAddress = "Matthew.Smicker@sanofi-aventis.com" fromAddress = From("openbis@sanofi-aventis.com") recipients = [ "Matthew.Smicker@sanofi-aventis.com" ] state.mailClient.sendMessage("openBIS: registration finished - " + plate_code, content, replyAddress, fromAddress, recipients) """ Allows to recognize that the subdirectory of the incoming dataset directory contains overlay images. This text has to appear in the subdirectory name. """ OVERLAYS_DIR_PATTERN = "_ROITiff" def register_images_with_overlays_and_analysis(incoming): if not incoming.isDirectory(): return tr = service.transaction(incoming, factory) image_dataset_details = create_image_dataset_details(incoming) plate_code = image_dataset_details.getDataSetInformation().getSampleCode() space_code = image_dataset_details.getDataSetInformation().getSpaceCode() register_sample_if_necessary(space_code, DEFAULT_PROJECT_CODE, DEFAULT_EXPERIMENT_CODE, plate_code) # create the image data set and put everything in it initially image_data_set = tr.createNewDataSet(image_dataset_details) image_data_set_folder = tr.moveFile(incoming.getPath(), image_data_set) img_dataset_code = image_data_set.getDataSetCode() # move overlays folder overlays_dir = find_dir(File(image_data_set_folder), OVERLAYS_DIR_PATTERN) if overlays_dir != None: tr_overlays = service.transaction(overlays_dir, factory) convert_to_png(overlays_dir.getPath(), OVERLAYS_TRANSPARENT_COLOR) overlay_dataset_details = create_overlay_dataset_details(overlays_dir, image_dataset_details.getDataSetInformation(), img_dataset_code, "png") overlays_data_set = tr_overlays.createNewDataSet(overlay_dataset_details) tr_overlays.moveFile(overlays_dir.getPath(), overlays_data_set, "overlays") tr_overlays.commit() # transform and move analysis file analysis_file = find_file_by_ext(File(image_data_set_folder), "xml") if analysis_file != None: tr_analysis = service.transaction(analysis_file, factory) analysis_run = extract_file_basename(analysis_file.getName()) analysis_registration_details = create_analysis_dataset_details( space_code, plate_code, img_dataset_code, analysis_run) analysis_data_set = tr_analysis.createNewDataSet(analysis_registration_details) analysis_data_set_file = tr_analysis.createNewFile(analysis_data_set, analysis_file.getName()) GEExplorerImageAnalysisResultParser(analysis_file.getPath()).writeCSV(File(analysis_data_set_file)) tr_analysis.commit() service.commit() notify(plate_code) register_images_with_overlays_and_analysis(incoming)