Use htsworkflow ontology to validate various RDF using components.
authorDiane Trout <diane@caltech.edu>
Wed, 19 Sep 2012 23:10:57 +0000 (16:10 -0700)
committerDiane Trout <diane@caltech.edu>
Wed, 19 Sep 2012 23:10:57 +0000 (16:10 -0700)
Of course to use the ontology I had to make one first.
Unsurprisingly implementing it touched a bunch of code & templates.

I tried to be more consisten with using mixed-case names for
classes and lower_case names for properties.

There's some inconsistencies. like i use the term notes & comments
in different areas. Also, should I be using my own terms or
do better at reusing more standard ontologies?

12 files changed:
encode_submission/test_encode_find.py
htsworkflow/frontend/experiments/tests.py
htsworkflow/frontend/templates/experiments/flowcell_detail.html
htsworkflow/frontend/templates/experiments/flowcell_header.html
htsworkflow/frontend/templates/experiments/flowcell_lane_detail.html
htsworkflow/frontend/templates/sample_header.html
htsworkflow/frontend/templates/samples/library_detail.html
htsworkflow/pipelines/sequences.py
htsworkflow/pipelines/test/test_sequences.py
htsworkflow/submission/condorfastq.py
htsworkflow/submission/test/test_condorfastq.py
htsworkflow/util/schemas/htsworkflow.turtle [new file with mode: 0644]

index cb6216711bb078caf9e880522b8f2c383ddfdf85..05108955a4d31927b98d395b290ef986a3163d88 100644 (file)
@@ -7,12 +7,19 @@ import RDF
 
 import encode_find
 from htsworkflow.submission.ucsc import submission_view_url
-from htsworkflow.util.rdfhelp import dump_model, get_model, fromTypedNode
+from htsworkflow.util.rdfhelp import add_default_schemas, \
+     dump_model, get_model, fromTypedNode
+from htsworkflow.util.rdfinfer import Infer
 
 SOURCE_PATH = os.path.split(os.path.abspath(__file__))[0]
 print SOURCE_PATH
 
 class TestEncodeFind(unittest.TestCase):
+    def setUp(self):
+        self.model = get_model()
+        add_default_schemas(self.model)
+        self.inference = Infer(self.model)
+
     def test_create_status_node_with_uri(self):
         subURL = submission_view_url('5136')
         submissionUri = RDF.Uri(subURL)
@@ -35,11 +42,10 @@ class TestEncodeFind(unittest.TestCase):
         test_file = os.path.join(SOURCE_PATH, 'testdata', '5136SubDetail.html')
         from lxml.html import parse
         tree = parse(test_file)
-        model = get_model()
-        dates = encode_find.get_creation_dates(model, subNode)
+        dates = encode_find.get_creation_dates(self.model, subNode)
         self.assertEqual(len(dates), 0)
-        encode_find.parse_submission_page(model, tree, subNode)
-        dates = encode_find.get_creation_dates(model, subNode)
+        encode_find.parse_submission_page(self.model, tree, subNode)
+        dates = encode_find.get_creation_dates(self.model, subNode)
         self.assertEqual(len(dates), 1)
         object_date = fromTypedNode(dates[0].object)
         self.assertEqual(object_date, datetime(2011,12,7,15,23,0))
@@ -58,43 +64,54 @@ class TestEncodeFind(unittest.TestCase):
     libns:total_unique_locations 5789938 .
 
 ''', 'http://jumpgate.caltech.edu/library/')
+        errmsgs = list(self.inference.run_validation())
+        self.assertEqual(len(errmsgs), 0)
         urn = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/lane/1232'))
         encode_find.delete_lane(model, urn)
         self.failUnlessEqual(len(model), 0)
 
     def test_delete_lane_with_mapping(self):
-        model = get_model()
+        ontology_size = len(self.model)
         parser = RDF.Parser(name='turtle')
-        parser.parse_string_into_model(model, '''@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+        parser.parse_string_into_model(self.model, '''@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
 @prefix : <http://www.w3.org/1999/xhtml> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
 
+<http://jumpgate.caltech.edu/flowcell/42JV5AAXX/> a libns:IlluminaFlowcell .
 <http://jumpgate.caltech.edu/lane/1232>
     libns:flowcell <http://jumpgate.caltech.edu/flowcell/42JV5AAXX/> ;
     libns:has_mappings _:bnode110110 ;
-    libns:total_unique_locations 5789938 .
+    libns:total_unique_locations 5789938 ;
+    a libns:IlluminaLane .
 
 _:bnode110110
+    a libns:MappedCount ;
     libns:mapped_to "newcontam_UK.fa"@en ;
     libns:reads 42473 .
 ''', 'http://jumpgate.caltech.edu/library/')
-        self.failUnlessEqual(len(model), 5)
+        errmsgs = list(self.inference.run_validation())
+        self.assertEqual(len(errmsgs), 0)
+        self.failUnlessEqual(len(self.model), 8 + ontology_size)
         urn = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/lane/1232'))
-        encode_find.delete_lane(model, urn)
-        self.failUnlessEqual(len(model), 0)
+        encode_find.delete_lane(self.model, urn)
+        self.failUnlessEqual(len(self.model), 1 + ontology_size)
+        # the flowcell triple wasn't deleted.
 
     def test_delete_library(self):
-        model = get_model()
         parser = RDF.Parser(name='turtle')
-        parser.parse_string_into_model(model, '''@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+        parser.parse_string_into_model(self.model, '''@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
 @prefix : <http://www.w3.org/1999/xhtml> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
 @prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
 
+<http://jumpgate.caltech.edu/flowcell/42JV5AAXX/> a libns:IlluminaFlowcell .
+<http://jumpgate.caltech.edu/flowcell/62WCKAAXX/> a libns:IlluminaFlowcell .
+
 <http://jumpgate.caltech.edu/lane/1232>
+    a libns:IlluminaLane ;
     libns:flowcell <http://jumpgate.caltech.edu/flowcell/42JV5AAXX/> ;
     libns:has_mappings _:bnode110110 ;
     libns:total_unique_locations 5789938 .
@@ -104,14 +121,16 @@ _:bnode110110
     libns:has_lane <http://jumpgate.caltech.edu/lane/1232> ;
     libns:library_id "11011"@en ;
     libns:library_type "None"@en ;
-    a "libns:library"@en ;
+    a libns:Library ;
     <http://www.w3.org/1999/xhtml/vocab#stylesheet> <http://jumpgate.caltech.edu/static/css/app.css>, <http://jumpgate.caltech.edu/static/css/data-browse-index.css> .
 
 _:bnode110110
+    a libns:MappedCount ;
     libns:mapped_to "newcontam_UK.fa"@en ;
     libns:reads 42473 .
 
 <http://jumpgate.caltech.edu/lane/1903>
+    a libns:IlluminaLane ;
     libns:flowcell <http://jumpgate.caltech.edu/flowcell/62WCKAAXX/> ;
     libns:has_mappings _:bnode120970 ;
     libns:total_unique_locations 39172114 .
@@ -120,22 +139,25 @@ _:bnode110110
     libns:has_lane <http://jumpgate.caltech.edu/lane/1903> ;
     libns:library_id "12097"@en ;
     libns:library_type "Paired End (non-multiplexed)"@en ;
-    a "libns:library"@en .
+    a libns:Library .
 
 _:bnode120970
+    a libns:MappedCount ;
     libns:mapped_to "newcontam_UK.fa"@en ;
     libns:reads 64 .
 ''', 'http://jumpgate.caltech.edu/library')
+        errmsgs = list(self.inference.run_validation())
+        self.assertEqual(len(errmsgs), 0)
         urn = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/library/11011/'))
-        encode_find.delete_library(model, urn)
+        encode_find.delete_library(self.model, urn)
         q = RDF.Statement(None, encode_find.libraryOntology['reads'], None)
-        stmts = list(model.find_statements(q))
+        stmts = list(self.model.find_statements(q))
         self.failUnlessEqual(len(stmts), 1)
         self.failUnlessEqual(fromTypedNode(stmts[0].object),
                              64)
 
         q = RDF.Statement(None, encode_find.libraryOntology['library_id'], None)
-        stmts = list(model.find_statements(q))
+        stmts = list(self.model.find_statements(q))
         self.failUnlessEqual(len(stmts), 1)
         self.failUnlessEqual(fromTypedNode(stmts[0].object),
                              '12097')
index a72888484d6abb07003ceaa474c2c74b4fbc3457..93e1f3d452c32a5c6b74bae709794e1340a8e96b 100644 (file)
@@ -281,7 +281,7 @@ class ExperimentsTestCases(TestCase):
         failed_fc_span = flowcell_spans[0]
         failed_fc_a = failed_fc_span.getparent()
         # make sure some of our RDF made it.
-        self.assertEqual(failed_fc_a.get('rel'), 'libns:flowcell')
+        self.assertEqual(failed_fc_a.get('typeof'), 'libns:IlluminaFlowcell')
         self.assertEqual(failed_fc_a.get('href'), '/flowcell/30012AAXX/')
         fc_response = self.client.get(failed_fc_a.get('href'))
         self.assertEqual(fc_response.status_code, 200)
@@ -441,7 +441,7 @@ class ExperimentsTestCases(TestCase):
 
         select ?flowcell ?flowcell_id ?lane_id ?library_id
         where {
-          ?flowcell a libns:illumina_flowcell ;
+          ?flowcell a libns:IlluminaFlowcell ;
                     libns:flowcell_id ?flowcell_id ;
                     libns:has_lane ?lane .
           ?lane libns:lane_number ?lane_id ;
@@ -590,20 +590,23 @@ class TestSequencer(TestCase):
     def test_rdf(self):
         response = self.client.get('/flowcell/FC12150/', apidata)
         tree = fromstring(response.content)
-        divs = tree.xpath('//div[@rel="libns:sequenced_by"]',
-                          namespaces=NSMAP)
-        self.assertEqual(len(divs), 1)
-        self.assertEqual(divs[0].attrib['rel'], 'libns:sequenced_by')
-        self.assertEqual(divs[0].attrib['resource'], '/sequencer/2')
-
-        name = divs[0].xpath('./span[@property="libns:sequencer_name"]')
+        seq_by = tree.xpath('//div[@rel="libns:sequenced_by"]',
+                            namespaces=NSMAP)
+        self.assertEqual(len(seq_by), 1)
+        self.assertEqual(seq_by[0].attrib['rel'], 'libns:sequenced_by')
+        seq = seq_by[0].getchildren()
+        self.assertEqual(len(seq), 1)
+        self.assertEqual(seq[0].attrib['about'], '/sequencer/2')
+        self.assertEqual(seq[0].attrib['typeof'], 'libns:Sequencer')
+
+        name = seq[0].xpath('./span[@property="libns:sequencer_name"]')
         self.assertEqual(len(name), 1)
         self.assertEqual(name[0].text, 'Tardigrade')
-        instrument = divs[0].xpath(
+        instrument = seq[0].xpath(
             './span[@property="libns:sequencer_instrument"]')
         self.assertEqual(len(instrument), 1)
         self.assertEqual(instrument[0].text, 'ILLUMINA-EC5D15')
-        model = divs[0].xpath(
+        model = seq[0].xpath(
             './span[@property="libns:sequencer_model"]')
         self.assertEqual(len(model), 1)
         self.assertEqual(model[0].text, 'Illumina Genome Analyzer IIx')
index 5ce9fa2a067403650cc0e1ed0f2dc3d05bd98e82..3a3b736e9d445857c14a6be4e7889cd44fd6017b 100644 (file)
       </thead>
       <tbody>
       {% for lane in lanes %}
-        <tr rel="libns:has_lane" resource="{{lane.get_absolute_url}}" >
-          <td><a href="{{lane.get_absolute_url}}">
-              <span property="libns:lane_number" datatype="xsd:decimal">{{lane.lane_number}}</span></a></td>
-          <td><a href="{{lane.library.get_absolute_url}}"
-                 rel="libns:library"><span property="libns:library_id"
-              >{{lane.library.id}}</span></a>
+        <tr rel="libns:has_lane">
+          <div typeof="libns:IlluminaLane" about="{{lane.get_absolute_url}}">
+          <td>
+            <a href="{{lane.get_absolute_url}}">
+              <span property="libns:lane_number" datatype="xsd:decimal"
+                    >{{lane.lane_number}}</span>
+            </a>
+          </td>
+          <div rel="libns:library">
+          <div typeof="libns:Library" about="{{lane.library.get_absolute_url}}">
+          <td>
+            <a href="{{lane.library.get_absolute_url}}">
+              <span property="libns:library_id"
+                    >{{lane.library.id}}</span></a>
               {% if user.is_staff %}
               <a href="{{lane.library.get_admin_url}}">
                   <img class="icon_button"
                        src="/media/img/admin/icon_changelink.gif"/>
               </a>{% endif %}
           </td>
-          <td><a href="{{lane.library.get_absolute_url}}" rel="libns:library"><span property="libns:name">{{lane.library.library_name}}</span></a></td>
-          <td><a href="{{lane.library.library_species.get_absolute_url}}" rel="libns:species">
+          <td><a href="{{lane.library.get_absolute_url}}">
+              <span property="libns:name">{{lane.library.library_name}}</span>
+          </a></td>
+          <td rel="libns:species">
+            <a href="{{lane.library.library_species.get_absolute_url}}"
+               typeof="libns:Species">
               <span property="libns:species_name">{{ lane.library.library_species.scientific_name }}</span></a></td>
+          </div> <!-- end library class -->
+          </div> <!-- end library relation -->
           <td><span property="libns:comment">{{lane.comment}}</span></td>
+          </div> <!-- end lane -->
         </tr>
       {% endfor %}
       </tbody>
index 953897c5793702c629f1310dd6c5b787f562d213..412d3ac6b16ae4a02d4243f983787677f0bf9173 100644 (file)
@@ -2,9 +2,10 @@
   <h2>About this Flowcell</h2>
   <b>Flowcell</b>:
     <a href="{{flowcell.get_absolute_url}}" property="libns:flowcell_id">{{flowcell.flowcell_id}}</a>{% if user.is_staff %}<a href="{{flowcell.get_admin_url}}"><img class="icon_button" src="/media/img/admin/icon_changelink.gif" alt="Edit"/></a>{% endif%}
-  <br rel="rdf:type" resource="http://jumpgate.caltech.edu/wiki/LibraryOntology#illumina_flowcell"/>
-  <div rel="libns:sequenced_by"
-       resource="{{flowcell.sequencer.get_absolute_url}}">
+  <br rel="rdf:type" resource="http://jumpgate.caltech.edu/wiki/LibraryOntology#IlluminaFlowcell"/>
+  <div rel="libns:sequenced_by">
+  <div typeof="libns:Sequencer"
+       about="{{flowcell.sequencer.get_absolute_url}}">
   <b>Instrument</b>:
     <span property="libns:sequencer_name">{{ flowcell.sequencer.name }}</span>
     {% if flowcell.sequencer.instrument_name %}
@@ -15,6 +16,7 @@
     <span property="libns:sequencer_model">{{flowcell.sequencer.model}}</span>
     <br/>
   </div>
+  </div>
   {% for datarun in flowcell.datarun_set.all %}
   <b>Image Analysis</b>:
     <span property="libns:image_software">{{datarun.image_software}}</span>
index 19480edc3a8dbdd969626a2d7e326ac6b58295c4..6c517e648820940d56fde5e42eebab378e3e0cd1 100644 (file)
@@ -4,27 +4,27 @@
     <!-- App Stuff -->
     <link type="text/css" rel="stylesheet" href="/static/css/app.css" />
     <script type="text/javascript" src="/static/js/jquery.min.js"></script>
-    
+
     {% block additional_javascript %}
     {% endblock %}
 {% endblock %}
 
 {% block content %}
-<div id="lane_detail" class="htswdetail">
+<div id="lane_detail" class="htswdetail" rel="rdf:type" resource="libns:IlluminaLane">
   <div rel="libns:flowcell" resource="{{flowcell.get_absolute_url}}">
   {% include "experiments/flowcell_header.html" %}
   <div class="flowcell_lane_detail">
   <h2>About this lane</h2>
-  <b>Lane</b>: 
+  <b>Lane</b>:
     <span property="libns:lane_number" datatype="xsd:decimal">{{lane.lane_number}}</span><br/>
   <b>pM</b>:
     <span property="libns:pM" datatype="xsd:decimal">{{ lane.pM }}</span><br/>
   <b>Cluster Estimate</b>:
     <span property="libns:cluster_estimate" datatype="xsd:decimal"
           content="{{lane.cluster_estimate}}">{{ lane.cluster_estimate|intcomma }}</span><br/>
-  <b>Lane Status</b>: 
+  <b>Lane Status</b>:
     <span property="libns:status">{{ lane.status }}</span><br/>
-  <b>Comments</b>: 
+  <b>Comments</b>:
     <span property="libns:comment">{{ lane.comment }}</span><br/>
   </div>
   <hr/>
@@ -74,7 +74,7 @@
             </td>
           </tr>
           {% endif %}
-          {% endfor %} 
+          {% endfor %}
        </tbody>
      </table>
   {% endfor %}
index b7454e392d82daf737337a5e29a78b786ad4e1b8..c3d5f979628c388754e4f502f3aa0da9880dc2eb 100644 (file)
@@ -1,4 +1,6 @@
-<div id="librarydetail"  about="{{lib.get_absolute_url}}" typeof="libns:library">
+<div id="librarydetail"
+     about="{{lib.get_absolute_url}}"
+     typeof="libns:Library">
   <div class="library_identity">
     <h2>Library Name</h2>
     <b>Library ID</b>:
index c09a510aa5c676f23164ec631cd3f3a3b01b624c..66b2cc12301cafa252af0fcf0c8d33d0327a174b 100644 (file)
     <tbody>
       {% for lane in lib.lane_set.all %}
       <tr rel="libns:has_lane" resource="{{lane.get_absolute_url}}">
-        <td><a href="{{lane.flowcell.get_absolute_url}}" rel="libns:flowcell"
-            ><span property="libns:flowcell_id">{{ lane.flowcell.flowcell_id }}</span></a>
+        <td>
+          <a typeof="libns:IlluminaFlowcell" href="{{lane.flowcell.get_absolute_url}}">
+            <span property="libns:flowcell_id"
+                  >{{lane.flowcell.flowcell_id}}</span></a>
           {% if user.is_staff %}
             <a href="{{lane.flowcell.get_admin_url}}">
                <img class="icon_button"
             </a>
           {% endif%}
         </td>
-        <td><a href="{{lane.get_absolute_url}}">
+        <td typeof="libns:IlluminaLane" about="{{lane.get_absolute_url}}">
+          <span rel="libns:flowcell" resource="{{lane.flowcell.get_absolute_url}}"></span>
+          <a href="{{lane.get_absolute_url}}">
             <span property="libns:lane_number"  datatype="xsd:decimal"
-               >{{ lane.lane_number }}</span></a></td>
+               >{{ lane.lane_number }}</span>
+            </a></td>
         <td>{{ lane.comment }}</td>
       </tr>
          {% endfor %}
index 23e7fe8cfa401a52a52e936e91e3182311ed851f..479ce3ddcd330fa5fe4285215ed08edb81e1c710 100644 (file)
@@ -477,7 +477,7 @@ def guess_library_from_model(model, base_url, flowcell, lane_id):
     select ?library ?lane
     where {{
       <{flowcell}> libNS:has_lane ?lane ;
-                   a libNS:illumina_flowcell .
+                   a libNS:IlluminaFlowcell .
       ?lane libNS:lane_number {lane_id} ;
             libNS:library ?library .
     }}
index 34ddeab593da5636386a7f0a7254a5e2af1b53e4..e621875231d60a0da1207b1e2fb53d0e5c121fbe 100644 (file)
@@ -408,7 +408,7 @@ class SequenceFileTests(unittest.TestCase):
     libns:has_lane <{base}/lane/1169>, <{base}/lane/1170>,
                    <{base}/lane/1171>, <{base}/lane/1172> ;
     libns:read_length 75 ;
-    a libns:illumina_flowcell .
+    a libns:IlluminaFlowcell .
 
 <{base}/lane/1169>
     libns:lane_number 1 ; libns:library <{base}/library/10923/> .
index 9aab790ef6fb6262e998b7ef1b4c2689144d5e88..5ae4b7a01f481f1b75e181c50d919010fb4b3d26 100644 (file)
@@ -169,7 +169,7 @@ class CondorFastqExtract(object):
     def import_library(self, library):
         """Import library data into our model if we don't have it already
         """
-        q = RDF.Statement(library, rdfNS['type'], libraryOntology['library'])
+        q = RDF.Statement(library, rdfNS['type'], libraryOntology['Library'])
         present = False
         if not self.model.contains_statement(q):
             present = True
@@ -184,7 +184,7 @@ prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
 
 select distinct ?flowcell ?flowcell_id
 WHERE {
-  ?library a libns:library ;
+  ?library a libns:Library ;
            libns:has_lane ?lane .
   ?lane libns:flowcell ?flowcell .
   ?flowcell libns:flowcell_id ?flowcell_id .
@@ -195,7 +195,7 @@ WHERE {
             LOGGER.debug("Flowcells = %s" %(unicode(flowcell_ids)))
             flowcell_test = RDF.Statement(r['flowcell'],
                                           rdfNS['type'],
-                                          libraryOntology['illumina_flowcell'])
+                                          libraryOntology['IlluminaFlowcell'])
             if not self.model.contains_statement(flowcell_test):
                 # we probably lack full information about the flowcell.
                 load_into_model(self.model, 'rdfa', r['flowcell'])
index 94df7b60b2f1e7dbca6d5e2fc2ecbacded55f30a..f4d2e2c71aef39fc8f32dbb6058fa8db71510af9 100644 (file)
@@ -9,7 +9,9 @@ import unittest
 
 from htsworkflow.submission.condorfastq import CondorFastqExtract
 from htsworkflow.submission.results import ResultMap
-from htsworkflow.util.rdfhelp import load_string_into_model, dump_model
+from htsworkflow.util.rdfhelp import \
+     add_default_schemas, load_string_into_model, dump_model
+from htsworkflow.util.rdfinfer import Infer
 
 FCDIRS = [
     'C02F9ACXX',
@@ -96,8 +98,13 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
 @prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
 @prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
 
+<http://localhost/library/10000/> a libns:Library .
+<http://localhost/library/1331/> a libns:Library .
+<http://localhost/library/1421/> a libns:Library .
+<http://localhost/library/1661/> a libns:Library .
+
 <http://localhost/flowcell/30221AAXX/>
-        a libns:illumina_flowcell ;
+        a libns:IlluminaFlowcell ;
         libns:read_length 33 ;
         libns:flowcell_type "Single"@en ;
         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
@@ -112,18 +119,22 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         libns:flowcell_id "30221AAXX"@en .
 
 <http://localhost/lane/3401>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         libns:lane_number 1 .
 <http://localhost/lane/3402>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         libns:lane_number 2 .
 <http://localhost/lane/3403>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         libns:lane_number 3 .
 <http://localhost/lane/3404>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/11154/> ;
         libns:lane_number 4 .
@@ -131,24 +142,28 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         # read_length 33;
         # status "Unknown"@en .
 <http://localhost/lane/3405>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         libns:lane_number 5 .
 <http://localhost/lane/3406>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         libns:lane_number 6 .
 <http://localhost/lane/3407>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         libns:lane_number 7 .
 <http://localhost/lane/3408>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         libns:lane_number 8 .
 
 <http://localhost/flowcell/42JUYAAXX/>
-        a libns:illumina_flowcell ;
+        a libns:IlluminaFlowcell ;
         libns:read_length 76 ;
         libns:flowcell_type "Paired"@en ;
         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
@@ -163,22 +178,27 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         libns:flowcell_id "42JUYAAXX"@en .
 
 <http://localhost/lane/4201>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         libns:lane_number 1 .
 <http://localhost/lane/4202>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         libns:lane_number 2 .
 <http://localhost/lane/4203>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         libns:lane_number 3 .
 <http://localhost/lane/4204>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         libns:lane_number 4 .
 <http://localhost/lane/4205>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/11154/> ;
         libns:lane_number 5 .
@@ -186,20 +206,23 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         # read_length 76;
         # status "Unknown"@en .
 <http://localhost/lane/4206>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         libns:lane_number 6 .
 <http://localhost/lane/4207>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         libns:lane_number 7 .
 <http://localhost/lane/4208>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         libns:lane_number 8 .
 
 <http://localhost/flowcell/61MJTAAXX/>
-        a libns:illumina_flowcell ;
+        a libns:IlluminaFlowcell ;
         libns:read_length 76 ;
         libns:flowcell_type "Single"@en ;
         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
@@ -214,26 +237,32 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         libns:flowcell_id "61MJTAAXX"@en .
 
 <http://localhost/lane/6601>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         libns:lane_number 1 .
 <http://localhost/lane/6602>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         libns:lane_number 2 .
 <http://localhost/lane/6603>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         libns:lane_number 3 .
 <http://localhost/lane/6604>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         libns:lane_number 4 .
 <http://localhost/lane/6605>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         libns:lane_number 5 .
 <http://localhost/lane/6606>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/11154/> ;
         libns:lane_number 6 .
@@ -241,16 +270,18 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         # read_length 76;
         # status "Unknown"@en .
 <http://localhost/lane/6607>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         libns:lane_number 7 .
 <http://localhost/lane/6608>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         libns:lane_number 8 .
 
 <http://localhost/flowcell/30DY0AAXX/>
-        a libns:illumina_flowcell ;
+        a libns:IlluminaFlowcell ;
         libns:read_length 76 ;
         libns:flowcell_type "Paired"@en ;
         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
@@ -265,34 +296,42 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         libns:flowcell_id "30DY0AAXX"@en .
 
 <http://localhost/lane/3801>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
         libns:lane_number 1 .
 <http://localhost/lane/3802>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
         libns:lane_number 2 .
 <http://localhost/lane/3803>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
         libns:lane_number 3 .
 <http://localhost/lane/3804>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
         libns:lane_number 4 .
 <http://localhost/lane/3805>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
         libns:lane_number 5 .
 <http://localhost/lane/3806>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
         libns:lane_number 6 .
 <http://localhost/lane/3807>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
         libns:lane_number 7 .
 <http://localhost/lane/3808>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/11154/> ;
         libns:lane_number 8 .
@@ -301,7 +340,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         # status "Unknown"@en .
 
 <http://localhost/flowcell/C02F9ACXX/>
-        a libns:illumina_flowcell ;
+        a libns:IlluminaFlowcell ;
         libns:read_length 101 ;
         libns:flowcell_type "Paired"@en ;
         libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
@@ -310,6 +349,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         libns:flowcell_id "C02F9ACXX"@en .
 
 <http://localhost/lane/12300>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
         libns:library <http://localhost/library/12345/> ;
         libns:lane_number 3 .
@@ -318,6 +358,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         # status "Unknown"@en .
 
 <http://localhost/lane/12500>
+        a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
         libns:library <http://localhost/library/11154/> ;
         libns:lane_number 3 .
@@ -326,7 +367,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         # status "Unknown"@en .
 
 <http://localhost/library/11154/>
-        a libns:library ;
+        a libns:Library ;
         libns:affiliation "TSR"@en;
         libns:concentration "29.7";
         libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
@@ -350,7 +391,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
 
 
 <http://localhost/library/12345/>
-        a libns:library ;
+        a libns:Library ;
         libns:affiliation "TSR"@en;
         libns:concentration "12.345";
         libns:cell_line "Unknown"@en ;
@@ -401,6 +442,10 @@ class TestCondorFastq(unittest.TestCase):
                                           self.flowcelldir,
                                           self.logdir)
         load_string_into_model(self.extract.model, 'turtle', lib_turtle)
+        add_default_schemas(self.extract.model)
+        inference = Infer(self.extract.model)
+        errmsgs = list(inference.run_validation())
+        self.assertEqual(len(errmsgs), 0)
 
     def tearDown(self):
         shutil.rmtree(self.tempdir)
diff --git a/htsworkflow/util/schemas/htsworkflow.turtle b/htsworkflow/util/schemas/htsworkflow.turtle
new file mode 100644 (file)
index 0000000..7319c0c
--- /dev/null
@@ -0,0 +1,340 @@
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix htswlib: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
+
+# I'm still learning how to do modeling & validation
+# this version is just using rdfs:Literal for
+# any literal node.
+
+<http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+    dc:title "HTS-Workflow ontology" ;
+    a owl:Ontology .
+
+htswlib:Class a rdfs:Class .
+
+htswlib:IlluminaFlowcell
+    a rdfs:Class, htswlib:Class ;
+    rdfs:comment "information about a illumina flowcell" ;
+    rdfs:label "Flowcell" .
+
+htswlib:IlluminaLane
+    a rdfs:Class, htswlib:Class ;
+    rdfs:comment "information specific to a lane in a flowcell" ;
+    rdfs:label "Lane" .
+
+htswlib:Library
+    a rdfs:Class, htswlib:Class ;
+    rdfs:comment "a biological library that can be run on a flowcell" ;
+    rdfs:label "library" .
+
+htswlib:Sequencer
+    a rdfs:Class, htswlib:Class;
+    rdfs:comment "an instrument that converts libraries into sequence reads" ;
+    rdfs:label "Sequencer" .
+
+htswlib:Species
+    a rdfs:Class, htswlib:Class ;
+    rdfs:comment "What 'species' was our sample created from." ;
+    rdfs:label "Species" .
+
+htswlib:MappedCount
+    a rdfs:Class, htswlib:Class ;
+    rdfs:comment "Count of reads with locations" ;
+    rdfs:label "Mapped locations" .
+
+# reused properties?
+htswlib:flowcell
+    a rdf:Property ;
+    rdfs:comment "link to our source flowcell" ;
+    rdfs:label "Flowcell" ;
+    rdfs:domain htswlib:IlluminaLane ;
+    rdfs:range htswlib:IlluminaFlowcell .
+
+htswlib:has_lane
+    a rdf:Property ;
+    rdfs:comment "which lanes are attached to a flowcell" ;
+    rdfs:label "lanes" ;
+    # cheating, only flowcells & libraries can have lanes
+    rdfs:domain htswlib:Library ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range htswlib:IlluminaLane .
+
+htswlib:date
+    a rdf:Property ;
+    rdfs:comment "Date thing was constructed" ;
+    rdfs:label "made on" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:total_unique_locations
+    a rdf:Property ;
+    rdfs:comment "How many reads mapped uniquely" ;
+    rdfs:label "Unique locations" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:domain htswlib:IlluminaLane ;
+    rdfs:range rdfs:Literal .
+
+htswlib:has_mappings
+    a rdf:Property ;
+    rdfs:comment "collection of counts against sequence collections" ;
+    rdfs:label "Has mapping" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:domain htswlib:IlluminaLane ;
+    rdfs:range htswlib:MappedCount .
+
+# flowcell properties
+htswlib:flowcell_id
+    a rdf:Property ;
+    rdfs:comment "Serial number of a flowcell, used as unique key" ;
+    rdfs:label "Flowcell ID" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:flowcell_notes
+    a rdf:Property ;
+    rdfs:comment "Comments about flowcell" ;
+    rdfs:label "Notes" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:control_lane
+    a rdf:Property ;
+    rdfs:comment "Control lane used for image analysis/base calling" ;
+    rdfs:label "Control Lane" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:read_length
+    a rdf:Property ;
+    rdfs:comment "How many cycles did we run the sequencer for" ;
+    rdfs:label "Read length" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:flowcell_type
+    a rdf:Property ;
+    rdfs:comment "What kind of flowcell, e.g. single, paired." ;
+    rdfs:label "Flowcell Type" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:image_software
+    a rdf:Property ;
+    rdfs:comment "Image analysis software name." ;
+    rdfs:label "Image processor" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:image_version
+    a rdf:Property ;
+    rdfs:comment "Image analysis software version identifier." ;
+    rdfs:label "Image processor version" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:basecall_software
+    a rdf:Property ;
+    rdfs:comment "Base caller software name." ;
+    rdfs:label "Basecaller" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:basecall_version
+    a rdf:Property ;
+    rdfs:comment "Base caller software version identifier." ;
+    rdfs:label "Basecaller Version" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:alignment_software
+    a rdf:Property ;
+    rdfs:comment "Alignment software name." ;
+    rdfs:label "Alignment software" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:alignment_version
+    a rdf:Property ;
+    rdfs:comment "Alignment software version identifier." ;
+    rdfs:label "Aligner" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range rdfs:Literal .
+
+htswlib:sequenced_by
+    a rdf:Property ;
+    rdfs:comment "Which sequencer sequenced this flowcell" ;
+    rdfs:label "Sequenced By" ;
+    rdfs:domain htswlib:IlluminaFlowcell ;
+    rdfs:range htswlib:Sequencer .
+
+htswlib:sequencer_model
+    a rdf:Property ;
+    rdfs:comment "What model of sequencer was this flowcell run on." ;
+    rdfs:label "Sequencer model" ;
+    rdfs:domain htswlib:Sequencer ;
+    rdfs:range rdfs:Literal .
+
+htswlib:sequencer_instrument
+    a rdf:Property ;
+    rdfs:comment "Sequencer instrument identifier." ;
+    rdfs:label "Instrument ID " ;
+    rdfs:domain htswlib:Sequencer ;
+    rdfs:range rdfs:Literal .
+
+htswlib:sequencer_name
+    a rdf:Property ;
+    rdfs:comment "Local name for sequencer." ;
+    rdfs:label "Sequencer name " ;
+    rdfs:domain htswlib:Sequencer ;
+    rdfs:range rdfs:Literal .
+
+
+# library only properties
+htswlib:library_id
+    a rdf:Property ;
+    rdfs:comment "Library unique identifier" ;
+    rdfs:label "Library ID" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:name
+    a rdf:Property ;
+    rdfs:comment "human readable name" ;
+    rdfs:label "Name" ;
+    rdfs:range rdfs:Literal .
+
+htswlib:library_type
+    a rdf:Property ;
+    rdfs:comment """what type of library are we, which is primarily
+                 determined by the type of adaptors ligated onto
+                 sequence fragments.""" ;
+    rdfs:label "Library Type" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:stopping_point
+    a rdf:Property ;
+    rdfs:comment "Protocol stopping point" ;
+    rdfs:label "Stopping Point" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:concentration
+    a rdf:Property ;
+    rdfs:comment "Concentration of sample in nanograms per microliter" ;
+    rdfs:label "Concentration" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:insert_size
+    a rdf:Property ;
+    rdfs:comment "The expected size of the sample sequence" ;
+    rdfs:label "Insert Size" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:gel_cut
+    a rdf:Property ;
+    rdfs:comment "The estimated fragment sizes cut from gel";
+    rdfs:label "Gel Cut" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:made_by
+    a rdf:Property ;
+    rdfs:comment "Name of person who created library" ;
+    rdfs:label "Made by" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:multiplex_index
+    a rdf:Property ;
+    rdfs:comment "multiplex sequence" ;
+    rdfs:label "index" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:cell_line
+    a rdf:Property ;
+    rdfs:comment "Description of source material" ;
+    rdfs:label "cell line" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:experiment_type
+    a rdf:Property ;
+    rdfs:comment "What type of sequencing is this. e.g. RNA-Seq, DeNovo etc." ;
+    rdfs:label "Experiment Type" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:affiliation
+    a rdf:Property ;
+    rdfs:comment "Entity associated with this sample" ;
+    rdfs:label "affiliation" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+htswlib:species
+    a rdf:Property ;
+    rdfs:comment "Scientific name of sample species" ;
+    rdfs:label "Species" ;
+    rdfs:domain htswlib:Library ;
+    #rdfs:domain htswlib:IlluminaLane;
+    rdfs:range htswlib:Species .
+
+htswlib:replicate
+    a rdf:Property ;
+    rdfs:comment "Which replicate this was" ;
+    rdfs:label "replicate" ;
+    rdfs:domain htswlib:Library ;
+    rdfs:range rdfs:Literal .
+
+# lane only properties
+htswlib:lane_number
+    a rdf:Property ;
+    rdfs:comment "Which lane were we run in" ;
+    rdfs:label "lane id" ;
+    rdfs:domain htswlib:IlluminaLane ;
+    rdfs:range rdfs:Literal .
+
+# FIXME: should this be note?
+htswlib:comment
+    a rdf:Property ;
+    rdfs:comment "Comment about an object" ;
+    rdfs:range rdfs:Literal .
+
+htswlib:library
+     a rdf:Property ;
+     rdfs:comment "which library was run on this lane." ;
+     rdfs:label "Library" ;
+     rdfs:domain htswlib:IlluminaLane ;
+     rdfs:range htswlib:Library .
+
+# species properites
+htswlib:species_name
+     a rdf:Property ;
+     rdfs:comment "Scientific name of species" ;
+     rdfs:label "Species" ;
+     rdfs:domain htswlib:Species ;
+     rdfs:range rdfs:Literal .
+
+# mapped count properties
+htswlib:mapped_to
+     a rdf:Property ;
+     rdfs:comment "A sequence collection, genome, chromosome, spike in, etc.";
+     rdfs:label "Mapped to" ;
+     rdfs:domain htswlib:MappedCount ;
+     rdfs:range rdfs:Literal .
+
+htswlib:reads
+     a rdf:Property ;
+     rdfs:comment "Number of read locations" ;
+     rdfs:label "Reads" ;
+     rdfs:domain htswlib:MappedCount ;
+     rdfs:range rdfs:Literal .
\ No newline at end of file