From 89c191828b16e8385685eb7ef18f82bc751e5b3b Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Mon, 24 Sep 2012 15:28:10 -0700 Subject: [PATCH] Make the public html pages valid xhtml, and validate more RDFa cases. Also after I spent time playing with the w3c online validator, I decided it was best to try and add modest validation to my unit tests. So now there's a validate_xhtml function in ethelp. The one really weird thing is I tried to load the DTD in the test case, however it looks like librdf clobbered the XML catalog resolver at some point so the DTD resolver can't find anything. --- htsworkflow/frontend/experiments/tests.py | 79 +++++++++++++++++-- htsworkflow/frontend/samples/tests.py | 10 ++- htsworkflow/frontend/templates/base.html | 3 +- .../experiments/flowcell_detail.html | 68 +++++++++------- .../experiments/flowcell_header.html | 12 +-- .../experiments/flowcell_lane_detail.html | 31 +++++--- .../frontend/templates/sample_header.html | 6 +- .../templates/samples/library_detail.html | 15 +++- htsworkflow/util/ethelp.py | 48 ++++++++++- htsworkflow/util/schemas/htsworkflow.turtle | 29 +++++++ 10 files changed, 236 insertions(+), 65 deletions(-) diff --git a/htsworkflow/frontend/experiments/tests.py b/htsworkflow/frontend/experiments/tests.py index 93e1f3d..8f535c8 100644 --- a/htsworkflow/frontend/experiments/tests.py +++ b/htsworkflow/frontend/experiments/tests.py @@ -17,6 +17,7 @@ from django.test import TestCase from htsworkflow.frontend.experiments import models from htsworkflow.frontend.experiments import experiments from htsworkflow.frontend.auth import apidata +from htsworkflow.util.ethelp import validate_xhtml from htsworkflow.pipelines.test.simulate_runfolder import TESTDATA_DIR @@ -256,6 +257,7 @@ class ExperimentsTestCases(TestCase): u'11061',u'11062',u'11063',u'11064'] self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5') response = self.client.get('/admin/experiments/flowcell/153/') + tree = fromstring(response.content) for i in range(0,8): xpath_expression = '//input[@id="id_lane_set-%d-library"]' @@ -274,6 +276,10 @@ class ExperimentsTestCases(TestCase): """ self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5') response = self.client.get('/library/11070/') + self.assertEqual(response.status_code, 200) + status = validate_xhtml(response.content) + if status is not None: self.assertTrue(status) + tree = fromstring(response.content) flowcell_spans = tree.xpath('//span[@property="libns:flowcell_id"]', namespaces=NSMAP) @@ -285,8 +291,14 @@ class ExperimentsTestCases(TestCase): self.assertEqual(failed_fc_a.get('href'), '/flowcell/30012AAXX/') fc_response = self.client.get(failed_fc_a.get('href')) self.assertEqual(fc_response.status_code, 200) + status = validate_xhtml(response.content) + if status is not None: self.assertTrue(status) + fc_lane_response = self.client.get('/flowcell/30012AAXX/8/') self.assertEqual(fc_lane_response.status_code, 200) + status = validate_xhtml(response.content) + if status is not None: self.assertTrue(status) + def test_pooled_multiplex_id(self): fc_dict = experiments.flowcell_information('42JU1AAXX') @@ -423,17 +435,20 @@ class ExperimentsTestCases(TestCase): model = get_model() - expected = {1: ['11034'], - 2: ['11036'], - 3: ['12044','11045'], - 4: ['11047','13044'], - 5: ['11055'], - 6: ['11067'], - 7: ['11069'], - 8: ['11070']} + expected = {'1': ['11034'], + '2': ['11036'], + '3': ['12044','11045'], + '4': ['11047','13044'], + '5': ['11055'], + '6': ['11067'], + '7': ['11069'], + '8': ['11070']} url = '/flowcell/42JU1AAXX/' response = self.client.get(url) self.assertEqual(response.status_code, 200) + status = validate_xhtml(response.content) + if status is not None: self.assertTrue(status) + ns = urljoin('http://localhost', url) load_string_into_model(model, 'rdfa', response.content, ns=ns) body = """prefix rdf: @@ -610,3 +625,51 @@ class TestSequencer(TestCase): './span[@property="libns:sequencer_model"]') self.assertEqual(len(model), 1) self.assertEqual(model[0].text, 'Illumina Genome Analyzer IIx') + + def test_flowcell_with_rdf_validation(self): + from htsworkflow.util.rdfhelp import add_default_schemas, \ + dump_model, \ + get_model, \ + load_string_into_model + from htsworkflow.util.rdfinfer import Infer + + model = get_model() + add_default_schemas(model) + inference = Infer(model) + + url ='/flowcell/FC12150/' + response = self.client.get(url) + self.assertEqual(response.status_code, 200) + status = validate_xhtml(response.content) + if status is not None: self.assertTrue(status) + + load_string_into_model(model, 'rdfa', response.content) + + errmsgs = list(inference.run_validation()) + self.assertEqual(len(errmsgs), 2) + for errmsg in errmsgs: + self.assertEqual(errmsg, 'Missing type for: http://localhost/') + + def test_lane_with_rdf_validation(self): + from htsworkflow.util.rdfhelp import add_default_schemas, \ + dump_model, \ + get_model, \ + load_string_into_model + from htsworkflow.util.rdfinfer import Infer + + model = get_model() + add_default_schemas(model) + inference = Infer(model) + + url = '/lane/1193' + response = self.client.get(url) + self.assertEqual(response.status_code, 200) + status = validate_xhtml(response.content) + if status is not None: self.assertTrue(status) + + load_string_into_model(model, 'rdfa', response.content) + + errmsgs = list(inference.run_validation()) + self.assertEqual(len(errmsgs), 2) + for errmsg in errmsgs: + self.assertEqual(errmsg, 'Missing type for: http://localhost/') diff --git a/htsworkflow/frontend/samples/tests.py b/htsworkflow/frontend/samples/tests.py index daabc11..bfe4b0e 100644 --- a/htsworkflow/frontend/samples/tests.py +++ b/htsworkflow/frontend/samples/tests.py @@ -20,7 +20,7 @@ from htsworkflow.frontend.samples.views import \ from htsworkflow.frontend.auth import apidata from htsworkflow.util.conversion import unicode_or_none - +from htsworkflow.util.ethelp import validate_xhtml class LibraryTestCase(TestCase): fixtures = ['test_samples.json'] @@ -149,7 +149,8 @@ class SampleWebTestCase(TestCase): response = self.client.get('/library/10981/') self.assertEqual(response.status_code, 200) - load_string_into_model(model, 'rdfa', response.content) + content = response.content + load_string_into_model(model, 'rdfa', content) body = """prefix rdf: prefix libns: @@ -170,6 +171,9 @@ class SampleWebTestCase(TestCase): self.assertEqual(fromTypedNode(r['gel_cut']), 400) self.assertEqual(fromTypedNode(r['made_by']), u'Igor') + state = validate_xhtml(content) + if state is not None: self.assertTrue(state) + def test_library_index_rdfa(self): from htsworkflow.util.rdfhelp import \ add_default_schemas, get_model, load_string_into_model @@ -212,6 +216,8 @@ class SampleWebTestCase(TestCase): self.assertEqual(count, len(Library.objects.filter(hidden=False))) + state = validate_xhtml(response.content) + if state is not None: self.assertTrue(state) # The django test runner flushes the database between test suites not cases, # so to be more compatible with running via nose we flush the database tables # of interest before creating our sample data. diff --git a/htsworkflow/frontend/templates/base.html b/htsworkflow/frontend/templates/base.html index e623d0d..f8899df 100644 --- a/htsworkflow/frontend/templates/base.html +++ b/htsworkflow/frontend/templates/base.html @@ -3,7 +3,7 @@ "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd"> diff --git a/htsworkflow/frontend/templates/experiments/flowcell_detail.html b/htsworkflow/frontend/templates/experiments/flowcell_detail.html index 3a3b736..4183c99 100644 --- a/htsworkflow/frontend/templates/experiments/flowcell_detail.html +++ b/htsworkflow/frontend/templates/experiments/flowcell_detail.html @@ -10,9 +10,9 @@ {% endblock %} {% block content %} -
+
{% include "experiments/flowcell_header.html" %} -
+

Lanes

@@ -26,18 +26,16 @@ {% for lane in lanes %} - -
-
+ -
-
-
- - + - - - - + {{ lane.library.library_species.scientific_name }} + + {% endfor %}
- - {{lane.lane_number}} +
+ + {{lane.lane_number}} - + + {{lane.library.id}} {% if user.is_staff %} @@ -46,25 +44,29 @@ src="/media/img/admin/icon_changelink.gif"/> {% endif %} - {{lane.library.library_name}} - + + + {{lane.library.library_name}} + + - {{ lane.library.library_species.scientific_name }} {{lane.comment}} + {{lane.comment}} +
-
{% for run in flowcell.datarun_set.all %}

Run {{ run.runfolder_name }}

+ {% if run.lane_files %} @@ -74,34 +76,41 @@ + {% for lane_id, lane_file_set in run.lane_files.items %} - {% if lane_file_set.ivc_all %} + {% if lane_file_set.ivc_all %} @@ -109,7 +118,10 @@ {% endfor %}
IVC Percent Base IVC Percent Base All IVC Percent Base Called
{{ lane_id }} - + Lane {{lane_id }} IVC All - + Lane {{lane_id }} IVC Call - + Lane {{lane_id }} IVC % Base - + Lane {{lane_id }} IVC % Base All - + Lane {{lane_id }} IVC % Base Called
+ {% endif %} {% endfor %}
+
+ {% endblock %} diff --git a/htsworkflow/frontend/templates/experiments/flowcell_header.html b/htsworkflow/frontend/templates/experiments/flowcell_header.html index 412d3ac..f71ae0f 100644 --- a/htsworkflow/frontend/templates/experiments/flowcell_header.html +++ b/htsworkflow/frontend/templates/experiments/flowcell_header.html @@ -1,8 +1,8 @@ -
+

About this Flowcell

Flowcell: - {{flowcell.flowcell_id}}{% if user.is_staff %}Edit{% endif%} -
+ {{flowcell.flowcell_id}}{% if user.is_staff %}Edit{% endif%} +
@@ -33,10 +33,10 @@ Type: {{flowcell.flowcell_type}}
Read Length: - {{flowcell.read_length}}
+ {{flowcell.read_length}}
Control Lane: - {{flowcell.control_lane}}
+ {{flowcell.control_lane}}
Notes:
{{flowcell.notes}}
-
+
diff --git a/htsworkflow/frontend/templates/experiments/flowcell_lane_detail.html b/htsworkflow/frontend/templates/experiments/flowcell_lane_detail.html index 33c47fb..7e834eb 100644 --- a/htsworkflow/frontend/templates/experiments/flowcell_lane_detail.html +++ b/htsworkflow/frontend/templates/experiments/flowcell_lane_detail.html @@ -10,23 +10,26 @@ {% endblock %} {% block content %} -
+
{% include "experiments/flowcell_header.html" %}

About this lane

Lane: - {{lane.lane_number}}
+ {{lane.lane_number}}
pM: {{ lane.pM }}
+ {% if lane.cluster_estimate %} Cluster Estimate: {{ lane.cluster_estimate|intcomma }}
+ content="{{lane.cluster_estimate}}">{{ lane.cluster_estimate|intcomma }}
{% endif %} + {% if lane.status %} Lane Status: - {{ lane.status }}
+ {{ lane.status }}
{% endif %} + {% if lane.comment %} Comments: - {{ lane.comment }}
+ {{ lane.comment }}
{% endif %}

{% include "sample_header.html" %} @@ -52,29 +55,37 @@ {{lane_number}} - + Lane {{lane_id }} IVC All - + Lane {{lane_id }} IVC Call - + Lane {{lane_id }} IVC % Base - + Lane {{lane_id }} IVC % Base All - + Lane {{lane_id }} IVC % Base Called + {% else %} + No data {% endif %} {% endfor %} diff --git a/htsworkflow/frontend/templates/sample_header.html b/htsworkflow/frontend/templates/sample_header.html index d38d910..8dc14ee 100644 --- a/htsworkflow/frontend/templates/sample_header.html +++ b/htsworkflow/frontend/templates/sample_header.html @@ -5,7 +5,7 @@

Library Name

Library ID: {{ lib.id }} - {% if user.is_staff %}{% endif %} + {% if user.is_staff %}Edit{% endif %}
Name: {{ lib.library_name }} @@ -70,12 +70,12 @@
{% if lib.gel_cut_size %} Gel Cut Size: - {{ lib.gel_cut_size }} + {{ lib.gel_cut_size }}
{% endif %} {% if lib.insert_size %} Insert Size: - {{ lib.insert_size }} + {{ lib.insert_size }}
{% endif %} {% if lib.undiluted_concentration %} diff --git a/htsworkflow/frontend/templates/samples/library_detail.html b/htsworkflow/frontend/templates/samples/library_detail.html index b7a182e..9cc76f5 100644 --- a/htsworkflow/frontend/templates/samples/library_detail.html +++ b/htsworkflow/frontend/templates/samples/library_detail.html @@ -28,6 +28,7 @@ + {% if eland_results %} {% for result in eland_results %} {{ result.run_date|date}} @@ -48,6 +49,9 @@ {% endfor %} + {% else %} + No data + {% endif %} @@ -85,7 +89,7 @@ - + {% if lane_summary_list %} {# ls short for lane summary #} {% for ls in lane_summary_list %} @@ -111,8 +115,11 @@ {{ ls.repeat_reads|intcomma }} {% endfor %} - - + {% else %} + No data + {% endif %} + +

Flowcell Notes

@@ -123,6 +130,7 @@ + {% if lib.lane_set.all %} {% for lane in lib.lane_set.all %} @@ -147,6 +155,7 @@ {% endfor %} + {% endif %}
Comment


diff --git a/htsworkflow/util/ethelp.py b/htsworkflow/util/ethelp.py index 19f6c9f..e4fe897 100644 --- a/htsworkflow/util/ethelp.py +++ b/htsworkflow/util/ethelp.py @@ -1,6 +1,15 @@ +"""ElementTree helper functions """ -ElementTree helper functions -""" +import logging +import os +LOGGER = logging.getLogger(__name__) + +import lxml.etree +try: + XHTML_RDF_DTD = lxml.etree.DTD(external_id='-//W3C//DTD XHTML+RDFa 1.0//EN') +except lxml.etree.DTDParseError as e: + LOGGER.warn("Unable to load XHTML DTD %s" % (str(e),)) + def indent(elem, level=0): """ reformat an element tree to be 'pretty' (indented) @@ -21,7 +30,7 @@ def indent(elem, level=0): def flatten(elem, include_tail=0): """ - Extract the text from an element tree + Extract the text from an element tree (AKA extract the text that not part of XML tags) """ text = elem.text or "" @@ -30,3 +39,36 @@ def flatten(elem, include_tail=0): if include_tail and elem.tail: text += elem.tail return text +def validate_xhtml(html, base_url='http://localhost'): + """Helper for validating xhtml, mostly intended for test code + + Defaults to assuming XHTML+RDFa + Returns None if there was a problem configuring validation + Logs messages from lxml.etree using python logging + Returns True if it passed validation + and False if it fails. + """ + if XHTML_RDF_DTD is None: + return None + + try: + root = lxml.etree.fromstring(html, base_url=base_url) + except lxml.etree.ParseError as e: + LOGGER.warn("Unable to parse document: %s" % (str(e),)) + return False + + if XHTML_RDF_DTD.validate(root): + # so unlikely. + return True + + isgood = True + for msg in XHTML_RDF_DTD.error_log.filter_from_errors(): + # I have no idea how to suppress this error + # but I need the xmlns attributes for of my RDFa 1.0 encoding + if 'ERROR:VALID:DTD_UNKNOWN_ATTRIBUTE' in str(msg): + continue + else: + LOGGER.error(msg) + isgood = False + + return isgood diff --git a/htsworkflow/util/schemas/htsworkflow.turtle b/htsworkflow/util/schemas/htsworkflow.turtle index 7319c0c..92ed6e6 100644 --- a/htsworkflow/util/schemas/htsworkflow.turtle +++ b/htsworkflow/util/schemas/htsworkflow.turtle @@ -193,6 +193,28 @@ htswlib:sequencer_name rdfs:domain htswlib:Sequencer ; rdfs:range rdfs:Literal . +# lane properties +htswlib:status + a rdf:Proprety ; + rdfs:comment "Operators opinion of lane status, e.g. were there spots in the pictures" ; + rdfs:label "Status" ; + rdfs:domain htswlib:IlluminaLane ; + rdfs:range htswlib:Literal . + +htswlib:cluster_estimate + a rdf:Proprety ; + rdfs:comment "Estimate of clusters per tile" ; + rdfs:label "Cluster Estimate" ; + rdfs:domain htswlib:IlluminaLane ; + rdfs:range htswlib:Literal . + +htswlib:pM + a rdf:Proprety ; + rdfs:comment "picoMolarity" ; + rdfs:label "picoMolarity" ; + rdfs:domain htswlib:IlluminaLane ; + rdfs:range htswlib:Literal . + # library only properties htswlib:library_id @@ -217,6 +239,13 @@ htswlib:library_type rdfs:domain htswlib:Library ; rdfs:range rdfs:Literal . +htswlib:condition + a rdf:Property ; + rdfs:comment "Describes what treatment has been applied to the cells" ; + rdfs:label "Condition" ; + rdfs:domain htswlib:Library ; + rdfs:range rdfs:Literal . + htswlib:stopping_point a rdf:Property ; rdfs:comment "Protocol stopping point" ; -- 2.30.2