Older rdf library doesn't like unicode strings as nodes
authorDiane Trout <diane@caltech.edu>
Fri, 31 Aug 2012 21:39:45 +0000 (14:39 -0700)
committerDiane Trout <diane@caltech.edu>
Fri, 31 Aug 2012 21:39:45 +0000 (14:39 -0700)
so manually encode as utf8

htsworkflow/pipelines/sequences.py
htsworkflow/submission/condorfastq.py

index 462c03460df700650aa82d81f0fff0c9fd9bd6b4..5baf1b5022a948b1022925737bc79cd3e425aafd 100644 (file)
@@ -161,7 +161,9 @@ class SequenceFile(object):
                 model.add_statement(RDF.Statement(s, p, toTypedNode(o)))
         def add(model, s, p, o):
             model.add_statement(RDF.Statement(s,p,o))
-        fileNode = RDF.Node(RDF.Uri('file://' + os.path.abspath(self.path)))
+        # a bit unreliable... assumes filesystem is encoded in utf-8
+        path = os.path.abspath(self.path.encode('utf-8'))
+        fileNode = RDF.Node(RDF.Uri('file://' + path))
         add(model, fileNode, rdfNS['type'], libNS['raw_file'])
         add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell)
         add_lit(model, fileNode, libNS['lane_number'], self.lane)
index 5ed9243096b9061c5ec789651fca472d0268fdf9..71b7090d4375d29834a56a632970e8923e81d051 100644 (file)
@@ -157,7 +157,8 @@ class CondorFastqExtract(object):
 
     def import_libraries(self, result_map):
         for lib_id in result_map.keys():
-            liburl = urljoin(self.host, 'library/%s/' % (lib_id,))
+            lib_id_encoded = lib_id.encode('utf-8')
+            liburl = urljoin(self.host, 'library/%s/' % (lib_id_encoded,))
             library = RDF.Node(RDF.Uri(liburl))
             self.import_library(library)