#! /usr/bin/env python """ Script for uploading cluster data. Data is organized in directories called as cluster numbers (from 1 to 17). Each cluster directory has 1 pdf file and 10 TIF image files. There is also a text file that contains the list of genes contained in each cluster. """ import os, re, glob, shutil from time import * from datetime import * from java.io import File from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import SimpleImageDataConfig, ImageMetadata, Location from ch.systemsx.cisd.openbis.plugin.screening.shared.api.v1.dto import Geometry from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import SimpleImageContainerDataConfig, ChannelColor from loci.formats import ImageReader print "###################################################" tz=localtime()[3]-gmtime()[3] d=datetime.now() print d.strftime("%Y-%m-%d %H:%M:%S GMT"+"%+.2d" % tz+":00") def process(transaction): incoming = transaction.getIncoming() clusterName = os.path.basename(incoming.getPath()) #Check if project and experiment called "CLUSTERS" already exist, if not create them project = transaction.getProject("/SINERGIA/CLUSTERS") exp = transaction.getExperiment("/SINERGIA/CLUSTERS/CLUSTERS") if not project: project = transaction.createNewProject("/SINERGIA/CLUSTERS") if not exp: exp = transaction.createNewExperiment("/SINERGIA/CLUSTERS/CLUSTERS", 'SIRNA_HCS') exp.setPropertyValue("DESCRIPTION", "gene clusters") #create samples called Cluster1 to 17. Take the name from the directory (in the incoming folder there are 17 directories called cluster1-17) newClusterSample = transaction.getSample("/SINERGIA/" + clusterName) if not newClusterSample: newClusterSample=transaction.createNewSample("/SINERGIA/" + clusterName,'CLUSTER') newClusterSample.setExperiment(exp) #upload the pdf image of each cluster as dataset in the corresponding Cluster sample for pdf in glob.glob(os.path.join(incoming.getPath(), '*.pdf')): dataSetPDF = transaction.createNewDataSet() dataSetPDF.setDataSetType("PDF") dataSetPDF.setSample(newClusterSample) transaction.moveFile(pdf, dataSetPDF) # upload the 10 tif images for each cluster as a dataset in the corresponding Cluster sample tifDir = incoming.getPath() + "/tiffs" if not os.path.exists(tifDir): os.makedirs(tifDir) if glob.glob(os.path.join(incoming.getPath(), '*.tif')): for tif in glob.glob(os.path.join(incoming.getPath(), '*.tif')): shutil.move(tif, tifDir) dataSetTIF = transaction.createNewDataSet() dataSetTIF.setDataSetType("TIF_IMAGES") dataSetTIF.setSample(newClusterSample) transaction.moveFile(tifDir, dataSetTIF) # upload the 10 png images for each cluster as a dataset in the corresponding Cluster sample pngDir = incoming.getPath() + "/pngs" if not os.path.exists(pngDir): os.makedirs(pngDir) if glob.glob(os.path.join(incoming.getPath(), '*.png')): for png in glob.glob(os.path.join(incoming.getPath(), '*.png')): shutil.move(png, pngDir) dataSetPNG = transaction.createNewDataSet() dataSetPNG.setDataSetType("PNG_IMAGES") dataSetPNG.setSample(newClusterSample) transaction.moveFile(pngDir, dataSetPNG) # Open the geneList text file and create samples with the name of the genes. These samples have to be contained in the corresponding cluster. for textfile in glob.glob(os.path.join(incoming.getPath(), 'geneList.txt')): text = open(textfile, "r") lineIndex =0 for line in text: lineIndex=lineIndex+1 gene_list = re.split(r"[,]",line) gene_list = [ item.strip() for item in gene_list ] gene_list = filter(lambda x: len(x) > 0, gene_list) for gene in gene_list: print gene newGeneSample = transaction.createNewSample("/SINERGIA/" + gene,'GENE') newGeneSample.setContainer(newClusterSample) newGeneSample.setExperiment(exp) ################################################################################################################################### #This part of the script assumes that the gene directories are inside the cluster directories. # #in each Cluster directory there are subdirectories for each gene. Now we create a sample for each gene and set the cluster it belongs to as a container sample. # #Each gene has a pdf file and 10 movies, so tehy will be uploaded as datasets # if not glob.glob(os.path.join(incoming.getPath(), 'tiffs')): # for genes, pdfGene in zip(glob.glob(os.path.join(incoming.getPath(), '*')), glob.glob(os.path.join(incoming.getPath(), '*/*.pdf'))): # geneName = os.path.basename(genes) # newGeneSample = transaction.createNewSample("/SINERGIA/" + geneName,'GENE') # newGeneSample.setContainer(newClusterSample) # newGeneSample.setExperiment(exp) # videoDir = genes + "/videos" # if not os.path.exists(videoDir): # os.makedirs(videoDir) # dataSetpdfGene = transaction.createNewDataSet() # dataSetpdfGene.setDataSetType("PDF") # dataSetpdfGene.setSample(newGeneSample) # transaction.moveFile(pdfGene, dataSetpdfGene) # # for genes, mp4 in zip(glob.glob(os.path.join(incoming.getPath(), '*')), glob.glob(os.path.join(incoming.getPath(), '*/*.mp4'))): # shutil.move(mp4, videoDir) # dataSetMP4Gene = transaction.createNewDataSet() # dataSetMP4Gene.setDataSetType("VIDEOS") # dataSetMP4Gene.setSample(newGeneSample) # transaction.moveFile(videoDir, dataSetMP4Gene) # #