Merge branch 'django1.4' of mus.cacr.caltech.edu:htsworkflow into django1.4

author Diane Trout <diane@ghic.org>

Mon, 21 Oct 2013 20:59:29 +0000 (13:59 -0700)

committer Diane Trout <diane@ghic.org>

Mon, 21 Oct 2013 20:59:29 +0000 (13:59 -0700)
author Diane Trout <diane@ghic.org>
Mon, 21 Oct 2013 20:59:29 +0000 (13:59 -0700)
committer Diane Trout <diane@ghic.org>
Mon, 21 Oct 2013 20:59:29 +0000 (13:59 -0700)
diff --git a/encode_submission/encode3.py b/encode_submission/encode3.py

index 875d3bdd9d8205df2eaff65b2b48b5462366645a..a77151e1ead7d2a91091a58a4451a01bcd569d72 100644 (file)
--- a/encode_submission/encode3.py
+++ b/encode_submission/encode3.py
@@ -77,6 +77,12 @@ def main(cmdline=None):
              INDENTED.join(submission_names)))
      elif len(submission_names) == 1:
          name = submission_names[0]
+        
+    if name:
+        submission_uri = get_submission_uri(name)
+        logger.info('Submission URI: %s', name)
+    else:
+        logger.debug('No name, unable to create submission ur')
  
      mapper = None
      if opts.make_track_hub:
@@ -85,8 +91,6 @@ def main(cmdline=None):
                                      baseurl=opts.make_track_hub,
                                      baseupload=opts.track_hub_upload,
                                      host=opts.host)
-        submission_uri = get_submission_uri(name)
-
  
      if opts.load_rdf is not None:
          if submission_uri is None:
@@ -117,6 +121,8 @@ def main(cmdline=None):
      if opts.scan_submission:
          if name is None:
              parser.error("Please define a submission name")
+        if mapper is None:
+            parser.error("Scan submission needs --make-track-hub=public-url")
          mapper.scan_submission_dirs(results)
  
      if opts.make_track_hub:
diff --git a/htsworkflow/submission/submission.py b/htsworkflow/submission/submission.py

index 12a5154c33a01bf15d1349040e7bfd2f71f51df8..626897061ea4d9e4c78141a4cba65c4713011507 100644 (file)
--- a/htsworkflow/submission/submission.py
+++ b/htsworkflow/submission/submission.py
@@ -21,6 +21,9 @@ from htsworkflow.submission.daf import \
       ModelException, \
       get_submission_uri
  
+from django.conf import settings
+from django.template import Context, Template, loader
+
  LOGGER = logging.getLogger(__name__)
  
  class Submission(object):
@@ -121,6 +124,7 @@ class Submission(object):
          fileNode = self.make_file_node(pathname, an_analysis)
          self.add_md5s(filename, fileNode, analysis_dir)
          self.add_fastq_metadata(filename, fileNode)
+        self.add_label(file_type, fileNode, libNode)
          self.model.add_statement(
              RDF.Statement(fileNode,
                            rdfNS['type'],
@@ -182,6 +186,23 @@ class Submission(object):
              if value is not None:
                  s = RDF.Statement(fileNode, model_term, toTypedNode(value))
                  self.model.append(s)
+                
+    def add_label(self, file_type, file_node, lib_node):
+        """Add rdfs:label to a file node
+        """
+        #template_term = libraryOntology['label_template']
+        template_term = libraryOntology['label_template']
+        label_template = self.model.get_target(file_type, template_term)
+        if label_template:
+            template = loader.get_template('submission_view_rdfs_label_metadata.sparql')
+            context = Context({
+                'library': str(lib_node.uri),
+                })
+            for r in self.execute_query(template, context):
+                context = Context(r)
+                label = Template(label_template).render(context)
+                s = RDF.Statement(file_node, rdfsNS['label'], unicode(label))
+                self.model.append(s)
  
      def _add_library_details_to_model(self, libNode):
          # attributes that can have multiple values
diff --git a/htsworkflow/submission/trackhub_submission.py b/htsworkflow/submission/trackhub_submission.py

index 5003fc0fa27772bb542f4d40e15e11a69a97c5db..7a83e8770ee39515ffe90fa223bce3ea19767e47 100644 (file)
--- a/htsworkflow/submission/trackhub_submission.py
+++ b/htsworkflow/submission/trackhub_submission.py
@@ -105,12 +105,17 @@ class TrackHubSubmission(Submission):
  
              track_subgroup = self.make_track_subgroups(subgroups, track)
  
+            if 'file_label' in track:
+                track_label = self.sanitize_name(track['file_label'])
+            else:
+                track_label = track_name
+
              newtrack = Track(
                  name=track_name,
                  tracktype = str(track['file_type']),
                  url= hub_url + str(track['relative_path']),
                  short_label=str(track['library_id']),
-                long_label=track_name,
+                long_label=str(track_label),
                  subgroups=track_subgroup,
                  )
              view.add_tracks([newtrack])
@@ -155,12 +160,11 @@ class TrackHubSubmission(Submission):
          return str(template.render(context))
  
      def make_track_name(self, track):
-        name = '{}_{}_{}'.format(
+        return '{}_{}_{}'.format(
              track['library_id'],
              track['replicate'],
              track['output_type'],
          )
-        return name
  
      def make_track_subgroups(self, subgroups, track):
          track_subgroups = {}
@@ -257,7 +261,6 @@ class TrackHubSubmission(Submission):
          return name
  
      def get_manifest_metadata(self, analysis_node):
-
          query_template = loader.get_template('trackhub_manifest.sparql')
  
          context = Context({
diff --git a/htsworkflow/templates/manifest.txt b/htsworkflow/templates/manifest.txt

index adf055457f53d68f9ead81906614cd4415641c53..c81259cb77949fdffccb321f59e93634fc5e1540 100644 (file)
--- a/htsworkflow/templates/manifest.txt
+++ b/htsworkflow/templates/manifest.txt
@@ -1,2 +1,2 @@
-#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db{% for r in files %}
-{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }}{% endfor %}
+#file_name     format  output_type     experiment      replicate       enriched_in     ucsc_db replaces        replace_reason{% for r in files %}
+{{ r.relative_path }}  {{ r.file_format }}     {{ r.output_type }}     {{ r.dataset_id }}      {{ r.replicate }}       {{ r.enriched_in }}     {{ r.ucsc_db }} {{ r.replaces_accession|default_if_none:"" }}   {{ r.replace_reason|default_if_none:"" }}{% endfor %}
diff --git a/htsworkflow/templates/submission_view_rdfs_label_metadata.sparql b/htsworkflow/templates/submission_view_rdfs_label_metadata.sparql

new file mode 100644 (file)

index 0000000..0666e62
--- /dev/null
+++ b/htsworkflow/templates/submission_view_rdfs_label_metadata.sparql
@@ -0,0 +1,10 @@
+PREFIX htsw: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+PREFIX encode3: <http://jumpgate.caltech.edu/wiki/Encode3#>
+
+select ?cell_line ?assay ?protocol ?lab
+where {
+    optional { <{{ library }}> htsw:cell_line ?cell_line . }
+    optional { <{{ library }}> encode3:assay ?assay . }
+    optional { <{{ library }}> encode3:protocol ?protocol. }
+    optional { <{{ library }}> encode3:lab ?lab. }
+}
diff --git a/htsworkflow/templates/trackhub_manifest.sparql b/htsworkflow/templates/trackhub_manifest.sparql

index 1c83b4760b2b416553c91ebbc9b29f64bec38943..cb5c4fd5195b87302ddd6236f19ea2b16339f8cf 100644 (file)
--- a/htsworkflow/templates/trackhub_manifest.sparql
+++ b/htsworkflow/templates/trackhub_manifest.sparql
@@ -6,7 +6,7 @@ PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
  PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
  PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
  
-select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db
+select distinct ?name ?filename ?relative_path ?file_format ?output_type ?dataset_id ?replicate ?enriched_in ?ucsc_db ?replaces_accession ?replace_reason
  WHERE {
    <{{submission}}> a submissionOntology:submission ;
                     submissionOntology:name ?name ;
@@ -17,6 +17,10 @@ WHERE {
          htswlib:library ?library ;
          a ?fileClass .
  
+  OPTIONAL { ?file encode3:replaces ?replaces_accession ;
+                   encode3:replace_reason ?replace_reason .
+  }
+
    ?fileClass geoSoft:fileTypeLabel ?file_format ;
               ucscDaf:output_type ?output_type .
    
diff --git a/htsworkflow/templates/trackhub_samples.sparql b/htsworkflow/templates/trackhub_samples.sparql

index 19ce7e1d5d7f32d35b88e961c867095104dbde88..95152747b43a5ed4109e8dd8fb020edb120bfc80 100644 (file)
--- a/htsworkflow/templates/trackhub_samples.sparql
+++ b/htsworkflow/templates/trackhub_samples.sparql
@@ -1,3 +1,5 @@
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
  PREFIX htswlib: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
  PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
  PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
@@ -6,8 +8,7 @@ PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
  PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
  PREFIX encode3: <http://jumpgate.caltech.edu/wiki/Encode3#>
  
-select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol 
-
+select distinct ?lab_library_id ?library_id ?filename ?relative_path ?output_type ?file_type ?cell ?replicate ?assay ?rna_type ?protocol ?file_label
  WHERE {
    ?trackType geoSoft:fileTypeLabel ?file_type ;
               ucscDaf:output_type ?output_type .
@@ -15,6 +16,7 @@ WHERE {
          ucscDaf:relative_path ?relative_path ;
          htswlib:library ?library ;
          a ?trackType .
+  OPTIONAL { ?file rdfs:label ?file_label . }
    OPTIONAL { ?library htswlib:library_id ?lab_library_id }
    OPTIONAL { ?library encode3:library_id ?library_id }
    OPTIONAL { ?library htswlib:cell_line ?cell . }
diff --git a/htsworkflow/util/hashfile.py b/htsworkflow/util/hashfile.py

index 2900e76ae3727f0b2182376f666cda4deaf85bd3..af3db764971623b9c3580684ddef2ea90b63ee0c 100644 (file)
--- a/htsworkflow/util/hashfile.py
+++ b/htsworkflow/util/hashfile.py
@@ -40,6 +40,8 @@ def make_md5sum_unix(filename, md5_cache):
  
  def parse_md5sum_line(lines, filename):
      md5sum, md5sum_filename = lines[0].split()
+    md5sum_filename = os.path.normpath(md5sum_filename)
+    filename = os.path.normpath(filename)
      if md5sum_filename != filename:
          errmsg = "MD5sum and I disagre about filename. {0} != {1}"
          logger.error(errmsg.format(filename, md5sum_filename))
diff --git a/htsworkflow/util/test/test_ucsc.py b/htsworkflow/util/test/test_ucsc.py

index 05a64ba994e8311a9a9482864778865d67195035..2b2e9763cb69cc61f43a93ca39bd73e25afa36a3 100644 (file)
--- a/htsworkflow/util/test/test_ucsc.py
+++ b/htsworkflow/util/test/test_ucsc.py
@@ -17,7 +17,7 @@ class TestUCSC(TestCase):
          self.assertEqual(info.version, 4)
          self.assertEqual(info.isCompressed, True)
          # what should i do for byteswapped arch?
-        self.assertEqual(info.isSwapped, True)
+        self.assertEqual(info.isSwapped, False)
          self.assertEqual(info.primaryDataSize, 48)
          self.assertEqual(info.primaryIndexSize, 6204)
          self.assertEqual(info.zoomLevels, 2)
author	Diane Trout <diane@ghic.org>
	Mon, 21 Oct 2013 20:59:29 +0000 (13:59 -0700)
committer	Diane Trout <diane@ghic.org>
	Mon, 21 Oct 2013 20:59:29 +0000 (13:59 -0700)
encode_submission/encode3.py		patch \| blob \| history
htsworkflow/submission/submission.py		patch \| blob \| history
htsworkflow/submission/trackhub_submission.py		patch \| blob \| history
htsworkflow/templates/manifest.txt		patch \| blob \| history
htsworkflow/templates/submission_view_rdfs_label_metadata.sparql	[new file with mode: 0644]	patch \| blob
htsworkflow/templates/trackhub_manifest.sparql		patch \| blob \| history
htsworkflow/templates/trackhub_samples.sparql		patch \| blob \| history
htsworkflow/util/hashfile.py		patch \| blob \| history
htsworkflow/util/test/test_ucsc.py		patch \| blob \| history