5 from htsworkflow.util import queuecommands
7 LOGGER = logging.getLogger(__name__)
13 def pathname_to_run_name(base):
15 Convert a pathname to a base runfolder name
16 handle the case with a trailing /
18 >>> print pathname_to_run_name("/a/b/c/run")
20 >>> print pathname_to_run_name("/a/b/c/run/")
22 >>> print pathname_to_run_name("run")
24 >>> print pathname_to_run_name("run/")
26 >>> print pathname_to_run_name("../run")
28 >>> print pathname_to_run_name("../run/")
33 base, name = os.path.split(base)
38 def make_srf_commands(run_name, bustard_dir, lanes, site_name, destdir, cmdlevel=ILLUMINA2SRF11):
40 make a subprocess-friendly list of command line arguments to run solexa2srf
42 woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
45 run_name - most of the file name (run folder name is a good choice)
46 lanes - list of integers corresponding to which lanes to process
47 site_name - name of your "sequencing site" or "Individual"
48 destdir - where to write all the srf files
51 LOGGER.info("run_name %s" % (run_name,))
55 name_prefix = '%s_%%l_' % (run_name,)
56 destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
57 destdir = os.path.normpath(destdir)
58 dest_path = os.path.join(destdir, destname)
59 seq_pattern = 's_%d_*_seq.txt' % (lane,)
61 if cmdlevel == SOLEXA2SRF:
67 elif cmdlevel == ILLUMINA2SRF10:
68 cmd = ['illumina2srf',
72 elif cmdlevel == ILLUMINA2SRF11:
73 seq_pattern = 's_%d_*_qseq.txt' % (lane,)
74 cmd = ['illumina2srf',
78 raise ValueError("Unrecognized run level %d" % (cmdlevel,))
80 LOGGER.info("Generated command: " + " ".join(cmd))
81 cmd_list.append(" ".join(cmd))
84 def create_qseq_patterns(bustard_dir):
85 """Scan a bustard directory for qseq files and determine a glob pattern
87 # grab one tile for each lane.
88 qseqs = glob(os.path.join(bustard_dir, '*_1101_qseq.txt'))
89 # handle old runfolders
91 qseqs = glob(os.path.join(bustard_dir, '*_0001_qseq.txt'))
94 qseqs = [ os.path.split(x)[-1] for x in qseqs ]
95 if len(qseqs[0].split('_')) == 4:
97 return [(None, "s_%d_[0-9][0-9][0-9][0-9]_qseq.txt")]
98 elif len(qseqs[0].split('_')) == 5:
100 # build a dictionary of read numbers by lane
101 # ( just in case we didn't run all 8 lanes )
104 sample, lane, read, tile, extension = q.split('_')
105 lanes.setdefault(lane, []).append(read)
107 # grab a lane from the dictionary
108 # I don't think it matters which one.
110 # build the list of patterns
111 for read in lanes[k]:
113 qseq_patterns.append((read, 's_%d_' + '%d_[0-9][0-9][0-9][0-9]_qseq.txt' % (read,)))
116 raise RuntimeError('unrecognized qseq pattern, not a single or multiple read pattern')
118 def make_qseq_commands(run_name, bustard_dir, lanes, site_name, destdir, cmdlevel=ILLUMINA2SRF11):
120 make a subprocess-friendly list of command line arguments to run solexa2srf
121 generates files like:
122 woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
125 run_name - most of the file name (run folder name is a good choice)
126 lanes - list of integers corresponding to which lanes to process
127 site_name - name of your "sequencing site" or "Individual"
128 destdir - where to write all the srf files
131 LOGGER.info("run_name %s" % (run_name,))
135 name_prefix = '%s_%%l_%%t_' % (run_name,)
136 destdir = os.path.normpath(destdir)
137 qseq_patterns = create_qseq_patterns(bustard_dir)
139 for read, pattern in qseq_patterns:
141 destname = '%s_%s_l%d.tar.bz2' % (site_name, run_name, lane)
142 dest_path = os.path.join(destdir, destname)
144 destname = '%s_%s_l%d_r%d.tar.bz2' % (site_name, run_name, lane, read)
145 dest_path = os.path.join(destdir, destname)
147 cmd = " ".join(['tar', 'cjf', dest_path, pattern % (lane,) ])
148 LOGGER.info("Generated command: " + cmd)
153 def run_commands(new_dir, cmd_list, num_jobs):
154 LOGGER.info("chdir to %s" % (new_dir,))
157 q = queuecommands.QueueCommands(cmd_list, num_jobs)
161 def make_md5_commands(destdir):
163 Scan the cycle dir and create md5s for the contents
166 destdir = os.path.abspath(destdir)
167 bz2s = glob(os.path.join(destdir, "*.bz2"))
168 gzs = glob(os.path.join(destdir, "*gz"))
169 srfs = glob(os.path.join(destdir, "*.srf"))
171 file_list = bz2s + gzs + srfs
174 cmd = " ".join(['md5sum', f, '>', f + '.md5'])
175 LOGGER.info('generated command: ' + cmd)