Wednesday, May 9, 2012

Converting a directories tree to a xml object: test with MFISH dataset

Once the MFISH dataset  reordered properly,   a xml object representing the directories hierarchy is build.
The  MFISH folder contains three subfolders: ASI, PSI and Vysis . Let's build a xml file called ASI. let's find the line 51 in the following python script and modify to fit the path to the ASI folder, for example:

topdir = '/home/user/MFISH/ASI'

then run the script:

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 31 13:30:22 2012

@author: Jean-Patrick Pommier 
    with a huge help of Mont29:
    http://www.developpez.net/forums/u238430/mont29/
"""
import lxml.etree as et
import os,re

def filtreImages(files):
    filtlist=[]
    #a file name ending with  .TIF or .tif
    regex=re.compile('.(TIF|tif)$')
    if len(files)>0:
        for f in files:
            result=regex.search(f)
            if result<>None:
                #print f
                filtlist.append(f)              
    #print filtlist
    return filtlist

def makeNodes(node_dirs, root, leveldirlist, root_level, files):
    # Penser à aérer le code, c’est plus agréable à lire...
    code = {0: 'slide', 1: 'field', 2: 'fluorochrome', 3: 'TooDeep'}
    for d in leveldirlist:
        child = et.Element(code[root_level], name=d)
        if code[root_level] == 'field':
            child.set('ROI', 'x0,y0,x1,y1')
            child.set( 'position','-1,-1')
        nodes_dirs[os.path.join(root, d)] = child
        nodes_dirs[root].append(child)
    # Le "if len(file) > 0" est inutile, si une liste est vide,
    # itérer dessus revient à ne rien faire ;)
    #
    # files est indépendant de leveldirlist, il faut donc boucler dessus
    # "à part"!
    for image in files:
        #print " root:", root, " root_level:", root_level, " image:", image
        child = et.Element("image",name=image, exposure='0.0')
        # Ne surtout pas utiliser d ici! Sinon, on écrase les nœuds définis
        # dans la boucle précédente... image devrait faire l’affaire :p
        nodes_dirs[os.path.join(root, image)] = child
        nodes_dirs[root].append(child)
                  
level={}            
if __name__ == '__main__':
    #topdir = '/home/claire/Applications/ProjetPython/testxml/P'
    topdir = '/home/simon/MFISH/ASI'
    projetxml = et.Element('CytoGenet')  # racine
    parent = projetxml
    nodes_dirs = {topdir: parent}
    ln_root = len(topdir)
    
    for root, dirs, files in os.walk(topdir):
        lvl = root[ln_root:].count(os.path.sep)
        #print nodes_dirs
        filtered=filtreImages(files)
        #print filtered
        makeNodes(nodes_dirs, root, dirs, lvl,filtered)
        
        
            
    print(et.tostring(projetxml,pretty_print=True))
    print projetxml.getchildren()[0].get("name")
    slides=projetxml.getchildren()[0]
    print len(slides)
    print projetxml.getchildren()[1].get("name")
    slides=projetxml.getchildren()
    
    metaphases1=slides[0].getchildren()
    
    metaphases2=slides[1].getchildren()
    for m in metaphases2:
        print m.get("name")
    
   
The script just display the xml object:

<ASI>
  <slide name="A32">
    <field name="01" ROI="x0,y0,x1,y1" position="-1,-1">
      <fluorochrome name="SpGreen">
        <image name="A3201EXG.tif" exposure="0.0"/>
      </fluorochrome>
      <fluorochrome name="SpOrange">
        <image name="A3201EXO.tif" exposure="0.0"/>
      </fluorochrome>
      <fluorochrome name="TexasRed">
        <image name="A3201EXT.tif" exposure="0.0"/>
      </fluorochrome>
      <fluorochrome name="Cy5-5">
        <image name="A3201EX5.tif" exposure="0.0"/>
      </fluorochrome>
      <fluorochrome name="DAPI">
        <image name="A3201EXD.tif" exposure="0.0"/>
      </fluorochrome>
      <fluorochrome name="Cy5">
        <image name="A3201EXC.tif" exposure="0.0"/>
      </fluorochrome>
    </field>