projects
/
htsworkflow.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
65c4fae
)
Read the first sequence out of the fastq to find the read_length
author
Diane Trout
<diane@caltech.edu>
Mon, 30 Nov 2015 21:47:33 +0000
(13:47 -0800)
committer
Diane Trout
<diane@caltech.edu>
Mon, 30 Nov 2015 21:47:33 +0000
(13:47 -0800)
For some reason we had a flowcell where the sequencer thought
there were 66 cycles but the file only had 50.
htsworkflow/submission/submission.py
patch
|
blob
|
history
diff --git
a/htsworkflow/submission/submission.py
b/htsworkflow/submission/submission.py
index b13138ac047109332a9a4dd9de0fbe4f0fbf9ad9..a27391d6e1cd3ac8c9c542b94d089f042b9fa48c 100644
(file)
--- a/
htsworkflow/submission/submission.py
+++ b/
htsworkflow/submission/submission.py
@@
-20,6
+20,7
@@
from htsworkflow.submission.daf import \
MetadataLookupException, \
ModelException, \
get_submission_uri
MetadataLookupException, \
ModelException, \
get_submission_uri
+from htsworkflow.util import opener
from django.conf import settings
from django.template import Context, Template, loader
from django.conf import settings
from django.template import Context, Template, loader
@@
-125,6
+126,7
@@
class Submission(object):
fileNode = self.make_file_node(pathname, an_analysis)
self.add_md5s(filename, fileNode, analysis_dir)
self.add_file_size(filename, fileNode, analysis_dir)
fileNode = self.make_file_node(pathname, an_analysis)
self.add_md5s(filename, fileNode, analysis_dir)
self.add_file_size(filename, fileNode, analysis_dir)
+ self.add_read_length(filename, fileNode, analysis_dir)
self.add_fastq_metadata(filename, fileNode)
self.add_label(file_type, fileNode, libNode)
self.model.add_statement(
self.add_fastq_metadata(filename, fileNode)
self.add_label(file_type, fileNode, libNode)
self.model.add_statement(
@@
-177,6
+179,19
@@
class Submission(object):
self.model.add_statement(
RDF.Statement(fileNode, dafTermOntology['file_size'], toTypedNode(file_size)))
self.model.add_statement(
RDF.Statement(fileNode, dafTermOntology['file_size'], toTypedNode(file_size)))
+ def add_read_length(self, filename, fileNode, analysis_dir):
+ submission_pathname = os.path.join(analysis_dir, filename)
+ stream = opener.autoopen(submission_pathname, 'rt')
+ header = stream.readline().strip()
+ sequence = stream.readline().strip()
+ read_length = len(sequence)
+ self.model.add_statement(
+ RDF.Statement(fileNode,
+ libraryOntology['read_length'],
+ toTypedNode(read_length))
+ )
+ LOGGER.debug("Updating read length: %d", read_length)
+
def add_fastq_metadata(self, filename, fileNode):
# How should I detect if this is actually a fastq file?
try:
def add_fastq_metadata(self, filename, fileNode):
# How should I detect if this is actually a fastq file?
try:
@@
-189,7
+204,7
@@
class Submission(object):
('lib_id', libraryOntology['library_id']),
('lane', libraryOntology['lane_number']),
('read', libraryOntology['read']),
('lib_id', libraryOntology['library_id']),
('lane', libraryOntology['lane_number']),
('read', libraryOntology['read']),
-
('cycle', libraryOntology['read_length'])
]
+ ]
for file_term, model_term in terms:
value = fqname.get(file_term)
if value is not None:
for file_term, model_term in terms:
value = fqname.get(file_term)
if value is not None: