include RELEASE-VERSION
include version.py
+include htsworkflow/util/schemas/*.turtle
from htsworkflow.submission import daf, ucsc
from htsworkflow.util import api
+from htsworkflow.util.rdfns import *
from htsworkflow.util.rdfhelp import \
- dafTermOntology, \
- dublinCoreNS, \
get_model, \
get_serializer, \
sparql_query, \
submissionOntology, \
libraryOntology, \
- load_into_model, \
- rdfNS, \
- rdfsNS, \
- xsdNS
+ load_into_model
TYPE_N = rdfNS['type']
CREATION_DATE = libraryOntology['date']
results.make_tree_from(opts.make_tree_from)
if opts.fastq:
- extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
+ flowcells = os.path.join(opts.sequence, 'flowcells')
+ extractor = CondorFastqExtract(opts.host, flowcells,
+ model=opts.model,
force=opts.force)
extractor.create_scripts(results)
if opts.scan_submission:
+ if opts.name is None:
+ parser.error("Please define a submission name")
mapper.scan_submission_dirs(results)
if opts.make_soft:
from pprint import pprint
from htsworkflow.util.rdfhelp import \
- dafTermOntology, \
- dublinCoreNS, \
get_model, \
get_serializer, \
sparql_query, \
- submissionOntology, \
libraryOntology, \
- load_into_model, \
- rdfNS, \
- rdfsNS, \
- xsdNS
+ load_into_model
+from htsworkflow.util.rdfns import *
TYPE_N = rdfNS['type']
CREATION_DATE = libraryOntology['date']
mapper.link_daf(results)
if opts.fastq:
- extractor = CondorFastqExtract(opts.host, apidata, opts.sequence,
+ flowcells = os.path.join(opts.sequence, 'flowcells')
+ extractor = CondorFastqExtract(opts.host, flowcells,
force=opts.force)
extractor.create_scripts(results)
import os
import types
import re
+import sys
+from urlparse import urljoin, urlparse
+
+import RDF
+from htsworkflow.util.rdfhelp import libraryOntology as libNS
+from htsworkflow.util.rdfhelp import toTypedNode, fromTypedNode, rdfNS, \
+ stripNamespace, dump_model, simplify_uri
LOGGER = logging.getLogger(__name__)
# information and thus are unique so we don't have to do anything
return os.path.join(root, basename)
- def save(self, cursor):
+ def save_to_sql(self, cursor):
"""
Add this entry to a DB2.0 database.
"""
return cursor.execute(sql, sql_values)
+ def save_to_model(self, model, base_url=None):
+ def add_lit(model, s, p, o):
+ if o is not None:
+ model.add_statement(RDF.Statement(s, p, toTypedNode(o)))
+ def add(model, s, p, o):
+ model.add_statement(RDF.Statement(s,p,o))
+ # a bit unreliable... assumes filesystem is encoded in utf-8
+ path = os.path.abspath(self.path.encode('utf-8'))
+ fileNode = RDF.Node(RDF.Uri('file://' + path))
+ add(model, fileNode, rdfNS['type'], libNS['illumina_result'])
+ add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell)
+ add_lit(model, fileNode, libNS['lane_number'], self.lane)
+ if self.read is not None:
+ add_lit(model, fileNode, libNS['read'], self.read)
+ else:
+ add_lit(model, fileNode, libNS['read'], '')
+
+ add_lit(model, fileNode, libNS['library_id'], self.project)
+ add_lit(model, fileNode, libNS['multiplex_index'], self.index)
+ add_lit(model, fileNode, libNS['split_id'], self.split)
+ add_lit(model, fileNode, libNS['cycle'], self.cycle)
+ add_lit(model, fileNode, libNS['passed_filter'], self.pf)
+ add(model, fileNode, libNS['file_type'], libNS[self.filetype])
+
+ if base_url is not None:
+ flowcell = RDF.Node(RDF.Uri("{base}/flowcell/{flowcell}/".format(
+ base=base_url,
+ flowcell=self.flowcell)))
+ add(model, fileNode, libNS['flowcell'], flowcell)
+ if self.project is not None:
+ library = RDF.Node(RDF.Uri("{base}/library/{library}".format(
+ base=base_url,
+ library=self.project)))
+ add(model, fileNode, libNS['library'], library)
+
+
+ @classmethod
+ def load_from_model(cls, model, seq_id):
+ def get(s, p):
+ values = []
+ stmts = model.find_statements(RDF.Statement(s, p, None))
+ for s in stmts:
+ obj = s.object
+ if not obj.is_resource():
+ values.append(fromTypedNode(obj))
+ else:
+ values.append(obj)
+ return values
+ def get_one(s, p):
+ values = get(s, p)
+ if len(values) > 1:
+ errmsg = u"To many values for %s %s"
+ raise ValueError(errmsg % (unicode(s), unicode(p)))
+ elif len(values) == 1:
+ return values[0]
+ else:
+ return None
+
+ if not isinstance(seq_id, RDF.Node):
+ seq_id = RDF.Node(RDF.Uri(seq_id))
+ result_statement = RDF.Statement(seq_id,
+ rdfNS['type'],
+ libNS['illumina_result'])
+ if not model.contains_statement(result_statement):
+ raise KeyError(u"%s not found" % (unicode(seq_id),))
+
+ seq_type_node = model.get_target(seq_id, libNS['file_type'])
+ seq_type = stripNamespace(libNS, seq_type_node)
+
+ path = urlparse(str(seq_id.uri)).path
+ flowcellNode = get_one(seq_id, libNS['flowcell'])
+ flowcell = get_one(seq_id, libNS['flowcell_id'])
+ lane = get_one(seq_id, libNS['lane_number'])
+ read = get_one(seq_id, libNS['read'])
+
+ obj = cls(seq_type, path, flowcell, lane)
+ obj.read = read if read != '' else None
+ obj.project = get_one(seq_id, libNS['library_id'])
+ obj.index = get_one(seq_id, libNS['multiplex_index'])
+ obj.split = get_one(seq_id, libNS['split_id'])
+ obj.cycle = get_one(seq_id, libNS['cycle'] )
+ obj.pf = get_one(seq_id, libNS['passed_filter'])
+ obj.libraryNode = get_one(seq_id, libNS['library'])
+ return obj
+
+
def get_flowcell_cycle(path):
"""
Extract flowcell, cycle from pathname
LOGGER.debug("Found sequence at %s" % (f,))
return sequences
+
+
+def update_model_sequence_library(model, base_url):
+ """Find sequence objects and add library information if its missing
+ """
+ file_body = """
+ prefix libNS: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+ select ?filenode ?flowcell_id ?lane_id ?library_id ?flowcell ?library
+ where {
+ ?filenode a libNS:illumina_result ;
+ libNS:flowcell_id ?flowcell_id ;
+ libNS:lane_number ?lane_id .
+ OPTIONAL { ?filenode libNS:flowcell ?flowcell . }
+ OPTIONAL { ?filenode libNS:library ?library .}
+ OPTIONAL { ?filenode libNS:library_id ?library_id .}
+ }
+ """
+ LOGGER.debug("update_model_sequence_library query %s", file_body)
+ file_query = RDF.SPARQLQuery(file_body)
+ files = file_query.execute(model)
+
+ libraryNS = RDF.NS(urljoin(base_url, 'library/'))
+ flowcellNS = RDF.NS(urljoin(base_url, 'flowcell/'))
+ for f in files:
+ filenode = f['filenode']
+ LOGGER.debug("Updating file node %s", str(filenode))
+ lane_id = fromTypedNode(f['lane_id'])
+ if f['flowcell'] is None:
+ flowcell = flowcellNS[str(f['flowcell_id'])+'/']
+ LOGGER.debug("Adding file (%s) to flowcell (%s) link",
+ str(filenode),
+ str(flowcell))
+ model.add_statement(
+ RDF.Statement(filenode, libNS['flowcell'], flowcell))
+ else:
+ flowcell = f['flowcell']
+
+ if f['library'] is None:
+ if f['library_id'] is not None:
+ library = libraryNS[str(f['library_id']) + '/']
+ else:
+ library = guess_library_from_model(model, base_url,
+ flowcell,
+ lane_id)
+ library_id = toTypedNode(simplify_uri(library))
+ LOGGER.debug("Adding file (%s) to library (%s) link",
+ str(filenode),
+ str(library))
+ model.add_statement(
+ RDF.Statement(filenode, libNS['library_id'], library_id))
+ if library is not None:
+ model.add_statement(
+ RDF.Statement(filenode, libNS['library'], library))
+
+
+def guess_library_from_model(model, base_url, flowcell, lane_id):
+ """Attempt to find library URI
+ """
+ flowcellNode = RDF.Node(flowcell)
+ flowcell = str(flowcell.uri)
+ lane_body = """
+ prefix libNS: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+
+ select ?library ?lane
+ where {{
+ <{flowcell}> libNS:has_lane ?lane ;
+ a libNS:illumina_flowcell .
+ ?lane libNS:lane_number {lane_id} ;
+ libNS:library ?library .
+ }}
+ """
+ lane_body = lane_body.format(flowcell=flowcell, lane_id=lane_id)
+ lanes = []
+ tries = 3
+ while len(lanes) == 0 and tries > 0:
+ tries -= 1
+ lane_query = RDF.SPARQLQuery(lane_body)
+ lanes = [ l for l in lane_query.execute(model)]
+ if len(lanes) > 1:
+ # CONFUSED!
+ errmsg = "Too many libraries for flowcell {flowcell} "\
+ "lane {lane} = {count}"
+ LOGGER.error(errmsg.format(flowcell=flowcell,
+ lane=lane_id,
+ count=len(lanes)))
+ return None
+ elif len(lanes) == 1:
+ # success
+ return lanes[0]['library']
+ else:
+ # try grabbing data
+ model.load(flowcellNode.uri, name="rdfa")
+
+
import tempfile
import unittest
-from htsworkflow.pipelines import sequences
+import RDF
+from htsworkflow.pipelines import sequences
+from htsworkflow.util.rdfhelp import get_model, load_string_into_model, \
+ rdfNS, libraryOntology, dump_model, fromTypedNode
class SequenceFileTests(unittest.TestCase):
"""
self.assertEqual(f.make_target_name('/tmp'),
'/tmp/42BW9AAXX_152_s_4_1_eland_extended.txt.bz2')
- def test_sql(self):
- """
- Make sure that the quick and dirty sql interface in sequences works
- """
- import sqlite3
- db = sqlite3.connect(":memory:")
- c = db.cursor()
- sequences.create_sequence_table(c)
-
+ def _generate_sequences(self):
+ seqs = []
data = [('/root/42BW9AAXX/C1-152',
'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r1.tar.bz2'),
('/root/42BW9AAXX/C1-152',
'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r21.tar.bz2'),]
for path, name in data:
- seq = sequences.parse_qseq(path, name)
- seq.save(c)
+ seqs.append(sequences.parse_qseq(path, name))
+
+ path = '/root/42BW9AAXX/C1-38/Project_12345'
+ name = '12345_AAATTT_L003_R1_001.fastq.gz'
+ pathname = os.path.join(path,name)
+ seqs.append(sequences.parse_fastq(path, name))
+ self.assertEqual(len(seqs), 5)
+ return seqs
+
+
+ def test_sql(self):
+ """
+ Make sure that the quick and dirty sql interface in sequences works
+ """
+ import sqlite3
+ db = sqlite3.connect(":memory:")
+ c = db.cursor()
+ sequences.create_sequence_table(c)
+
+ for seq in self._generate_sequences():
+ seq.save_to_sql(c)
count = c.execute("select count(*) from sequences")
row = count.fetchone()
- self.assertEqual(row[0], 4)
+ self.assertEqual(row[0], 5)
+
+ def test_basic_rdf_scan(self):
+ """Make sure we can save to RDF model"""
+ import RDF
+ model = get_model()
+
+ for seq in self._generate_sequences():
+ seq.save_to_model(model)
+
+ files = list(model.find_statements(
+ RDF.Statement(None,
+ rdfNS['type'],
+ libraryOntology['illumina_result'])))
+ self.assertEqual(len(files), 5)
+ files = list(model.find_statements(
+ RDF.Statement(None,
+ libraryOntology['file_type'],
+ libraryOntology['qseq'])))
+ self.assertEqual(len(files), 4)
+ files = list(model.find_statements(
+ RDF.Statement(None,
+ libraryOntology['file_type'],
+ libraryOntology['split_fastq'])))
+ self.assertEqual(len(files), 1)
+
+ files = list(model.find_statements(
+ RDF.Statement(None, libraryOntology['library_id'], None)))
+ self.assertEqual(len(files), 1)
+
+ files = list(model.find_statements(
+ RDF.Statement(None, libraryOntology['flowcell_id'], None)))
+ self.assertEqual(len(files), 5)
+
+ files = list(model.find_statements(
+ RDF.Statement(None, libraryOntology['flowcell'], None)))
+ self.assertEqual(len(files), 0)
+
+ files = list(model.find_statements(
+ RDF.Statement(None, libraryOntology['library'], None)))
+ self.assertEqual(len(files), 0)
+
+ def test_rdf_scan_with_url(self):
+ """Make sure we can save to RDF model"""
+ import RDF
+ model = get_model()
+ base_url = 'http://localhost'
+ for seq in self._generate_sequences():
+ seq.save_to_model(model, base_url=base_url)
+ localFC = RDF.NS(base_url + '/flowcell/')
+ localLibrary = RDF.NS(base_url + '/library/')
+
+ files = list(model.find_statements(
+ RDF.Statement(None, libraryOntology['flowcell'], None)))
+ self.assertEqual(len(files), 5)
+ for f in files:
+ self.assertEqual(f.object, localFC['42BW9AAXX/'])
+
+ files = list(model.find_statements(
+ RDF.Statement(None, libraryOntology['library'], None)))
+ self.assertEqual(len(files), 1)
+ self.assertEqual(files[0].object, localLibrary['12345'])
+
+ def test_rdf_fixup_library(self):
+ """Make sure we can save to RDF model"""
+ base_url = 'http://localhost'
+ localLibrary = RDF.NS(base_url + '/library/')
+
+ flowcellInfo = """@prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
+
+<{base}/flowcell/42BW9AAXX/>
+ libns:flowcell_id "42BW9AXX"@en ;
+ libns:has_lane <{base}/lane/1169>, <{base}/lane/1170>,
+ <{base}/lane/1171>, <{base}/lane/1172> ;
+ libns:read_length 75 ;
+ a libns:illumina_flowcell .
+
+<{base}/lane/1169>
+ libns:lane_number 1 ; libns:library <{base}/library/10923/> .
+<{base}/lane/1170>
+ libns:lane_number 2 ; libns:library <{base}/library/10924/> .
+<{base}/lane/1171>
+ libns:lane_number 3 ; libns:library <{base}/library/12345/> .
+<{base}/lane/1172>
+ libns:lane_number 3 ; libns:library <{base}/library/10930/> .
+""".format(base=base_url)
+ model = get_model()
+ load_string_into_model(model, 'turtle', flowcellInfo)
+ for seq in self._generate_sequences():
+ seq.save_to_model(model)
+ f = sequences.update_model_sequence_library(model, base_url=base_url)
+
+ libTerm = libraryOntology['library']
+ libIdTerm = libraryOntology['library_id']
+
+ url = 'file:///root/42BW9AAXX/C1-152/woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r2.tar.bz2'
+ nodes = list(model.get_targets(RDF.Uri(url), libTerm))
+ self.assertEqual(len(nodes), 1)
+ self.assertEqual(nodes[0], localLibrary['10923/'])
+ nodes = list(model.get_targets(RDF.Uri(url), libIdTerm))
+ self.assertEqual(len(nodes), 1)
+ self.assertEqual(fromTypedNode(nodes[0]), '10923')
+
+ url = 'file:///root/42BW9AAXX/C1-152/woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r1.tar.bz2'
+ nodes = list(model.get_targets(RDF.Uri(url), libTerm))
+ self.assertEqual(len(nodes), 1)
+ self.assertEqual(nodes[0], localLibrary['10924/'])
+ nodes = list(model.get_targets(RDF.Uri(url), libIdTerm))
+ self.assertEqual(len(nodes), 1)
+ self.assertEqual(fromTypedNode(nodes[0]), '10924')
+
+ url = 'file:///root/42BW9AAXX/C1-38/Project_12345/12345_AAATTT_L003_R1_001.fastq.gz'
+ nodes = list(model.get_targets(RDF.Uri(url), libTerm))
+ self.assertEqual(len(nodes), 1)
+ self.assertEqual(nodes[0], localLibrary['12345/'])
+ nodes = list(model.get_targets(RDF.Uri(url), libIdTerm))
+ self.assertEqual(len(nodes), 1)
+ self.assertEqual(fromTypedNode(nodes[0]), '12345')
+
+ def test_load_from_model(self):
+ """Can we round trip through a RDF model"""
+ model = get_model()
+ path = '/root/42BW9AAXX/C1-38/Project_12345/'
+ filename = '12345_AAATTT_L003_R1_001.fastq.gz'
+ seq = sequences.parse_fastq(path, filename)
+ seq.save_to_model(model)
+
+ seq_id = 'file://'+path+filename
+ seqNode = RDF.Node(RDF.Uri(seq_id))
+ libNode = RDF.Node(RDF.Uri('http://localhost/library/12345'))
+ model.add_statement(
+ RDF.Statement(seqNode, libraryOntology['library'], libNode))
+ seq2 = sequences.SequenceFile.load_from_model(model, seq_id)
+
+ self.assertEqual(seq.flowcell, seq2.flowcell)
+ self.assertEqual(seq.flowcell, '42BW9AAXX')
+ self.assertEqual(seq.filetype, seq2.filetype)
+ self.assertEqual(seq2.filetype, 'split_fastq')
+ self.assertEqual(seq.lane, seq2.lane)
+ self.assertEqual(seq2.lane, 3)
+ self.assertEqual(seq.read, seq2.read)
+ self.assertEqual(seq2.read, 1)
+ self.assertEqual(seq.project, seq2.project)
+ self.assertEqual(seq2.project, '12345')
+ self.assertEqual(seq.index, seq2.index)
+ self.assertEqual(seq2.index, 'AAATTT')
+ self.assertEqual(seq.split, seq2.split)
+ self.assertEqual(seq2.split, '001')
+ self.assertEqual(seq.cycle, seq2.cycle)
+ self.assertEqual(seq.pf, seq2.pf)
+ self.assertEqual(seq2.libraryNode, libNode)
+ self.assertEqual(seq.path, seq2.path)
def test_scan_for_sequences(self):
# simulate tree
"""
import logging
import os
-from pprint import pformat
+from pprint import pformat,pprint
import sys
import types
+from urlparse import urljoin, urlparse
-from htsworkflow.pipelines.sequences import scan_for_sequences
+from htsworkflow.pipelines.sequences import scan_for_sequences, \
+ update_model_sequence_library
from htsworkflow.pipelines.samplekey import SampleKey
from htsworkflow.pipelines import qseq2fastq
from htsworkflow.pipelines import srf2fastq
from htsworkflow.pipelines import desplit_fastq
-from htsworkflow.util.api import HtswApi
+from htsworkflow.util.rdfhelp import get_model, dump_model, load_into_model, \
+ fromTypedNode, \
+ stripNamespace
+from htsworkflow.util.rdfns import *
from htsworkflow.util.conversion import parse_flowcell_id
from django.conf import settings
from django.template import Context, loader
+import RDF
+
LOGGER = logging.getLogger(__name__)
class CondorFastqExtract(object):
- def __init__(self, host, apidata, sequences_path,
+ def __init__(self, host, sequences_path,
log_path='log',
+ model=None,
force=False):
"""Extract fastqs from results archive
log_path (str): where to put condor log files
force (bool): do we force overwriting current files?
"""
- self.api = HtswApi(host, apidata)
+ self.host = host
+ self.model = get_model(model)
self.sequences_path = sequences_path
self.log_path = log_path
self.force = force
+ LOGGER.info("CondorFastq host={0}".format(self.host))
+ LOGGER.info("CondorFastq sequences_path={0}".format(self.sequences_path))
+ LOGGER.info("CondorFastq log_path={0}".format(self.log_path))
def create_scripts(self, result_map ):
"""
template_map = {'srf': 'srf.condor',
'qseq': 'qseq.condor',
'split_fastq': 'split_fastq.condor',
- 'by_sample': 'lane_to_fastq.turtle',
}
env = None
'logdir': self.log_path,
'env': env,
'args': condor_entries[script_type],
- 'root_url': self.api.root_url,
+ 'root_url': self.host,
}
context = Context(variables)
'qseq': self.condor_qseq_to_fastq,
'split_fastq': self.condor_desplit_fastq
}
- by_sample = {}
- lib_db = self.find_archive_sequence_files(result_map)
- needed_targets = self.find_missing_targets(result_map, lib_db)
+ sequences = self.find_archive_sequence_files(result_map)
+ needed_targets = self.update_fastq_targets(result_map, sequences)
for target_pathname, available_sources in needed_targets.items():
LOGGER.debug(' target : %s' % (target_pathname,))
if sources is not None:
condor_entries.setdefault(condor_type, []).append(
conversion(sources, target_pathname))
- for s in sources:
- by_sample.setdefault(s.lane_id,[]).append(
- target_pathname)
else:
print " need file", target_pathname
- condor_entries['by_sample'] = by_sample
return condor_entries
def find_archive_sequence_files(self, result_map):
"""
Find archived sequence files associated with our results.
"""
- LOGGER.debug("Searching for sequence files in: %s" %(self.sequences_path,))
-
- lib_db = {}
- seq_dirs = set()
- candidate_lanes = {}
- for lib_id in result_map.keys():
- lib_info = self.api.get_library(lib_id)
- lib_info['lanes'] = {}
- lib_db[lib_id] = lib_info
-
- for lane in lib_info['lane_set']:
- lane_key = (lane['flowcell'], lane['lane_number'])
- candidate_lanes[lane_key] = (lib_id, lane['lane_id'])
- seq_dirs.add(os.path.join(self.sequences_path,
- 'flowcells',
- lane['flowcell']))
- LOGGER.debug("Seq_dirs = %s" %(unicode(seq_dirs)))
- candidate_seq_list = scan_for_sequences(seq_dirs)
-
- # at this point we have too many sequences as scan_for_sequences
- # returns all the sequences in a flowcell directory
- # so lets filter out the extras
-
- for seq in candidate_seq_list:
- lane_key = (seq.flowcell, seq.lane)
- candidate_key = candidate_lanes.get(lane_key, None)
- if candidate_key is not None:
- lib_id, lane_id = candidate_key
- seq.lane_id = lane_id
- lib_info = lib_db[lib_id]
- lib_info['lanes'].setdefault(lane_key, set()).add(seq)
-
- return lib_db
-
- def find_missing_targets(self, result_map, lib_db):
+ self.import_libraries(result_map)
+ flowcell_ids = self.find_relavant_flowcell_ids()
+ self.import_sequences(flowcell_ids)
+
+ query_text = """
+ prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+
+ select ?filenode ?filetype ?cycle ?lane_number ?read
+ ?library ?library_id
+ ?flowcell ?flowcell_id ?read_length
+ ?flowcell_type ?flowcell_status
+ where {
+ ?filenode libns:cycle ?cycle ;
+ libns:lane_number ?lane_number ;
+ libns:read ?read ;
+ libns:flowcell ?flowcell ;
+ libns:flowcell_id ?flowcell_id ;
+ libns:library ?library ;
+ libns:library_id ?library_id ;
+ libns:file_type ?filetype ;
+ a libns:illumina_result .
+ ?flowcell libns:read_length ?read_length ;
+ libns:flowcell_type ?flowcell_type .
+ OPTIONAL { ?flowcell libns:flowcell_status ?flowcell_status }
+ FILTER(?filetype != libns:sequencer_result)
+ }
"""
- Check if the sequence file exists.
- This requires computing what the sequence name is and checking
- to see if it can be found in the sequence location.
+ LOGGER.debug("find_archive_sequence_files query: %s",
+ query_text)
+ query = RDF.SPARQLQuery(query_text)
+ results = []
+ for r in query.execute(self.model):
+ library_id = fromTypedNode(r['library_id'])
+ if library_id in result_map:
+ seq = SequenceResult(r)
+ LOGGER.debug("Creating sequence result for library %s: %s",
+ library_id,
+ repr(seq))
+ results.append(seq)
+ return results
+
+ def import_libraries(self, result_map):
+ for lib_id in result_map.keys():
+ lib_id_encoded = lib_id.encode('utf-8')
+ liburl = urljoin(self.host, 'library/%s/' % (lib_id_encoded,))
+ library = RDF.Node(RDF.Uri(liburl))
+ self.import_library(library)
- Adds seq.paired flag to sequences listed in lib_db[*]['lanes']
+ def import_library(self, library):
+ """Import library data into our model if we don't have it already
+ """
+ q = RDF.Statement(library, rdfNS['type'], libraryOntology['library'])
+ present = False
+ if not self.model.contains_statement(q):
+ present = True
+ load_into_model(self.model, 'rdfa', library)
+ LOGGER.debug("Did we import %s: %s", library, present)
+
+ def find_relavant_flowcell_ids(self):
+ """Generate set of flowcell ids that had samples of interest on them
+ """
+ flowcell_query =RDF.SPARQLQuery("""
+prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+
+select distinct ?flowcell ?flowcell_id
+WHERE {
+ ?library a libns:library ;
+ libns:has_lane ?lane .
+ ?lane libns:flowcell ?flowcell .
+ ?flowcell libns:flowcell_id ?flowcell_id .
+}""")
+ flowcell_ids = set()
+ for r in flowcell_query.execute(self.model):
+ flowcell_ids.add( fromTypedNode(r['flowcell_id']) )
+ LOGGER.debug("Flowcells = %s" %(unicode(flowcell_ids)))
+ flowcell_test = RDF.Statement(r['flowcell'],
+ rdfNS['type'],
+ libraryOntology['illumina_flowcell'])
+ if not self.model.contains_statement(flowcell_test):
+ # we probably lack full information about the flowcell.
+ load_into_model(self.model, 'rdfa', r['flowcell'])
+ return flowcell_ids
+
+ def import_sequences(self, flowcell_ids):
+ seq_dirs = []
+ for f in flowcell_ids:
+ seq_dirs.append(os.path.join(self.sequences_path, str(f)))
+ sequences = scan_for_sequences(seq_dirs)
+ for seq in sequences:
+ seq.save_to_model(self.model, self.host)
+ update_model_sequence_library(self.model, self.host)
+
+ def update_fastq_targets(self, result_map, raw_files):
+ """Return list of fastq files that need to be built.
+
+ Also update model with link between illumina result files
+ and our target fastq file.
"""
fastq_paired_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s_r%(read)s.fastq'
fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
# find what targets we're missing
needed_targets = {}
- for lib_id in result_map.keys():
- result_dir = result_map[lib_id]
- lib = lib_db[lib_id]
- lane_dict = make_lane_dict(lib_db, lib_id)
-
- for lane_key, sequences in lib['lanes'].items():
- for seq in sequences:
- seq.paired = lane_dict[seq.flowcell]['paired_end']
- lane_status = lane_dict[seq.flowcell]['status']
-
- if seq.paired and seq.read is None:
- seq.read = 1
- filename_attributes = {
- 'flowcell': seq.flowcell,
- 'lib_id': lib_id,
- 'lane': seq.lane,
- 'read': seq.read,
- 'cycle': seq.cycle
- }
- # skip bad runs
- if lane_status == 'Failed':
- continue
- if seq.flowcell == '30DY0AAXX':
- # 30DY0 only ran for 151 bases instead of 152
- # it is actually 76 1st read, 75 2nd read
- seq.mid_point = 76
-
- # end filters
- if seq.paired:
- target_name = fastq_paired_template % \
- filename_attributes
- else:
- target_name = fastq_single_template % \
- filename_attributes
-
- target_pathname = os.path.join(result_dir, target_name)
- if self.force or not os.path.exists(target_pathname):
- t = needed_targets.setdefault(target_pathname, {})
- t.setdefault(seq.filetype, []).append(seq)
-
+ for seq in raw_files:
+ if not seq.isgood:
+ continue
+ filename_attributes = {
+ 'flowcell': seq.flowcell_id,
+ 'lib_id': seq.library_id,
+ 'lane': seq.lane_number,
+ 'read': seq.read,
+ 'cycle': seq.cycle
+ }
+
+ if seq.ispaired:
+ target_name = fastq_paired_template % \
+ filename_attributes
+ else:
+ target_name = fastq_single_template % \
+ filename_attributes
+
+ result_dir = result_map[seq.library_id]
+ target_pathname = os.path.join(result_dir, target_name)
+ if self.force or not os.path.exists(target_pathname):
+ t = needed_targets.setdefault(target_pathname, {})
+ t.setdefault(seq.filetype, []).append(seq)
+ self.add_target_source_links(target_pathname, seq)
return needed_targets
+ def add_target_source_links(self, target, seq):
+ """Add link between target pathname and the 'lane' that produced it
+ (note lane objects are now post demultiplexing.)
+ """
+ target_uri = 'file://' + target
+ target_node = RDF.Node(RDF.Uri(target_uri))
+ source_stmt = RDF.Statement(target_node, dcNS['source'], seq.filenode)
+ self.model.add_statement(source_stmt)
def condor_srf_to_fastq(self, sources, target_pathname):
if len(sources) > 1:
raise ValueError("srf to fastq can only handle one file")
+ mid_point = None
+ if sources[0].flowcell_id == '30DY0AAXX':
+ mid_point = 76
+
return {
- 'sources': [os.path.abspath(sources[0].path)],
+ 'sources': [sources[0].path],
'pyscript': srf2fastq.__file__,
- 'flowcell': sources[0].flowcell,
- 'ispaired': sources[0].paired,
+ 'flowcell': sources[0].flowcell_id,
+ 'ispaired': sources[0].ispaired,
'target': target_pathname,
'target_right': target_pathname.replace('_r1.fastq', '_r2.fastq'),
- 'mid': getattr(sources[0], 'mid_point', None),
+ 'mid': mid_point,
'force': self.force,
}
paths.sort()
return {
'pyscript': qseq2fastq.__file__,
- 'flowcell': sources[0].flowcell,
+ 'flowcell': sources[0].flowcell_id,
'target': target_pathname,
'sources': paths,
- 'ispaired': sources[0].paired,
+ 'ispaired': sources[0].ispaired,
'istar': len(sources) == 1,
}
'pyscript': desplit_fastq.__file__,
'target': target_pathname,
'sources': paths,
- 'ispaired': sources[0].paired,
+ 'ispaired': sources[0].ispaired,
}
- def lane_rdf(self, sources, target_pathname):
- pass
def make_lane_dict(lib_db, lib_id):
"""
result.append((lane['flowcell'], lane))
return dict(result)
+class SequenceResult(object):
+ """Convert the sparql query result from find_archive_sequence_files
+ """
+ def __init__(self, result):
+ self.filenode = result['filenode']
+ self._filetype = result['filetype']
+ self.cycle = fromTypedNode(result['cycle'])
+ self.lane_number = fromTypedNode(result['lane_number'])
+ self.read = fromTypedNode(result['read'])
+ if type(self.read) in types.StringTypes:
+ self.read = 1
+ self.library = result['library']
+ self.library_id = fromTypedNode(result['library_id'])
+ self.flowcell = result['flowcell']
+ self.flowcell_id = fromTypedNode(result['flowcell_id'])
+ self.flowcell_type = fromTypedNode(result['flowcell_type'])
+ self.flowcell_status = fromTypedNode(result['flowcell_status'])
+
+ def _is_good(self):
+ """is this sequence / flowcell 'good enough'"""
+ if self.flowcell_status is not None and \
+ self.flowcell_status.lower() == "failed":
+ return False
+ return True
+ isgood = property(_is_good)
+
+ def _get_ispaired(self):
+ if self.flowcell_type.lower() == "paired":
+ return True
+ else:
+ return False
+ ispaired = property(_get_ispaired)
+
+ def _get_filetype(self):
+ return stripNamespace(libraryOntology, self._filetype)
+ filetype = property(_get_filetype)
+
+ def _get_path(self):
+ url = urlparse(str(self.filenode.uri))
+ if url.scheme == 'file':
+ return url.path
+ else:
+ errmsg = u"Unsupported scheme {0} for {1}"
+ raise ValueError(errmsg.format(url.scheme, unicode(url)))
+ path = property(_get_path)
+
+ def __repr__(self):
+ return "SequenceResult({0},{1},{2})".format(
+ str(self.filenode),
+ str(self.library_id),
+ str(self.flowcell_id))
rdfNS['type'],
submissionOntology['submission']))
self.model.add_statement(RDF.Statement(submissionNode,
- submissionOntology['library'],
+ libraryOntology['library'],
libNode))
LOGGER.debug("Adding statements to {0}".format(str(submissionView)))
def create_file_attributes(self, filename, submissionView, submission_uri, submission_dir):
# add file specific information
LOGGER.debug("Updating file md5sum")
- fileNode = RDF.Node(RDF.Uri(submission_uri + '/' + filename))
submission_pathname = os.path.join(submission_dir, filename)
+ fileNode = RDF.Node(RDF.Uri("file://" + submission_pathname))
self.model.add_statement(
RDF.Statement(submissionView,
dafTermOntology['has_file'],
from htsworkflow.util.rdfhelp import \
fromTypedNode, \
geoSoftNS, \
- simplifyUri, \
+ stripNamespace, \
submissionOntology
from django.conf import settings
for lib_id, result_dir in result_map.items():
an_analysis = self.get_submission_node(result_dir)
metadata = self.get_sample_metadata(an_analysis)
- if len(metadata) > 1:
+ if len(metadata) == 0:
+ errmsg = 'No metadata found for {0}'
+ LOGGER.error(errmsg.format(str(an_analysis),))
+ continue
+ elif len(metadata) > 1:
errmsg = 'Confused there are more than one samples for %s'
- LOGGER.debug(errmsg % (str(an_analysis,)))
+ LOGGER.debug(errmsg % (str(an_analysis),))
metadata = metadata[0]
metadata['raw'] = self.get_raw_files(an_analysis)
metadata['supplimental'] = self.get_sample_files(an_analysis)
def query_to_soft_dictionary(self, results, heading):
attributes = []
for r in results:
- name = simplifyUri(geoSoftNS, r['name'])
+ name = stripNamespace(geoSoftNS, r['name'])
if name is not None:
if name.lower() == heading.lower():
name = '^' + name
"""Help collect and process results for submission
"""
+from collections import MutableMapping
import os
import logging
LOGGER = logging.getLogger(__name__)
-class ResultMap(object):
+class ResultMap(MutableMapping):
"""Store list of results
"""
def __init__(self):
self.results_order = []
self.results = {}
- def keys(self):
- return self.results_order
+ def __iter__(self):
+ for item in self.results_order:
+ yield item
- def values(self):
- return ( self.results[r] for r in self.results_order )
+ def __len__(self):
+ l = len(self.results)
+ assert l == len(self.results_order)
+ return l
- def items(self):
- return ( (r, self.results[r]) for r in self.results_order )
+ def __setitem__(self, key, value):
+ self.results_order.append(key)
+ self.results[key] = value
def __getitem__(self, key):
return self.results[key]
+ def __delitem__(self, key):
+ del self.results[key]
+ i = self.results_order.index(key)
+ del self.results_order[i]
+
def add_results_from_file(self, filename):
pathname = os.path.abspath(filename)
basepath, name = os.path.split(pathname)
for lib_id, lib_path in results:
if not os.path.isabs(lib_path):
lib_path = os.path.join(basepath, lib_path)
- self.add_result(lib_id, lib_path)
-
- def add_result(self, lib_id, lib_path):
- self.results_order.append(lib_id)
- self.results[lib_id] = lib_path
+ self[lib_id] = lib_path
def make_tree_from(self, source_path, destpath = None):
"""Create a tree using data files from source path.
rdfNS['type'])
if file_classification is None:
errmsg = 'Could not find class for {0}'
- logger.warning(errmsg.format(str(file_type)))
+ LOGGER.warning(errmsg.format(str(file_type)))
return
self.model.add_statement(
import tempfile
import unittest
-from htsworkflow.submission import condorfastq
+from htsworkflow.submission.condorfastq import CondorFastqExtract
from htsworkflow.submission.results import ResultMap
+from htsworkflow.util.rdfhelp import load_string_into_model, dump_model
FCDIRS = [
'C02F9ACXX',
'C02F9ACXX/C1-202/Project_11154',
'C02F9ACXX/C1-202/Project_12342_Index1',
'C02F9ACXX/C1-202/Project_12342_Index2',
+ 'C02F9ACXX/C1-202/Project_12345',
'42JUYAAXX',
'42JUYAAXX/C1-76',
'30221AAXX',
'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
- 'C02F9ACXX/C1-202/Project_12342_Index1/11114_GCCAAT_L004_R1_001.fastq.gz',
- 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L007_R1_001.fastq.gz',
- 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L005_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
'42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
'42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
'42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
'61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
]
-LIBDATA = {
- '11154':{u'antibody_id': None,
- u'cell_line': u'Unknown',
- u'cell_line_id': 1,
- u'experiment_type': u'RNA-seq',
- u'experiment_type_id': 4,
- u'gel_cut_size': 300,
- u'hidden': False,
- u'id': u'11154',
- u'insert_size': 200,
- u'lane_set': [{u'flowcell': u'30221AAXX',
- u'lane_number': 4,
- u'lane_id': 3400,
- u'paired_end': False,
- u'read_length': 33,
- u'status': u'Unknown',
- u'status_code': None},
- {u'flowcell': u'42JUYAAXX',
- u'lane_number': 5,
- u'lane_id': 4200,
- u'paired_end': True,
- u'read_length': 76,
- u'status': u'Unknown',
- u'status_code': None},
- {u'flowcell': u'61MJTAAXX',
- u'lane_number': 6,
- u'lane_id': 6600,
- u'paired_end': False,
- u'read_length': 76,
- u'status': u'Unknown',
- u'status_code': None},
- {u'flowcell': u'30DY0AAXX',
- u'lane_number': 8,
- u'lane_id': 3800,
- u'paired_end': True,
- u'read_length': 76,
- u'status': u'Unknown',
- u'status_code': None},
- {u'flowcell': u'C02F9ACXX',
- u'lane_number': 3,
- u'lane_id': 12300,
- u'paired_end': True,
- u'read_length': 101,
- u'status': u'Unknown',
- u'status_code': None}],
- u'library_id': u'11154',
- u'library_name': u'Paired ends ASDF ',
- u'library_species': u'Mus musculus',
- u'library_species_id': 9,
- u'library_type': u'Paired End (non-multiplexed)',
- u'library_type_id': 2,
- u'made_by': u'Gary Gygax',
- u'made_for': u'TSR',
- u'notes': u'300 bp gel fragment',
- u'replicate': 1,
- u'stopping_point': u'1Aa',
- u'successful_pM': None,
- u'undiluted_concentration': u'29.7'}
- }
-
-FAKE_APIDATA = {'apiid':0, 'apikey': 'foo'}
-
-class FakeApi(object):
- def __init__(self, *args, **kwargs):
- self.root_url = 'http://localhost'
-
- def get_library(self, libid):
- lib_data = LIBDATA[libid]
- return copy.deepcopy(lib_data)
-
-
+lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
+@prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
+@prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
+
+<http://localhost/flowcell/30221AAXX/>
+ a libns:illumina_flowcell ;
+ libns:read_length 33 ;
+ libns:flowcell_type "Single"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/3401> ;
+ libns:has_lane <http://localhost/lane/3402> ;
+ libns:has_lane <http://localhost/lane/3403> ;
+ libns:has_lane <http://localhost/lane/3404> ;
+ libns:has_lane <http://localhost/lane/3405> ;
+ libns:has_lane <http://localhost/lane/3406> ;
+ libns:has_lane <http://localhost/lane/3407> ;
+ libns:has_lane <http://localhost/lane/3408> ;
+ libns:flowcell_id "30221AAXX"@en .
+
+<http://localhost/lane/3401>
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number 1 .
+<http://localhost/lane/3402>
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number 2 .
+<http://localhost/lane/3403>
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number 3 .
+<http://localhost/lane/3404>
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number 4 .
+ # paired_end 1;
+ # read_length 33;
+ # status "Unknown"@en .
+<http://localhost/lane/3405>
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number 5 .
+<http://localhost/lane/3406>
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number 6 .
+<http://localhost/lane/3407>
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number 7 .
+<http://localhost/lane/3408>
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number 8 .
+
+<http://localhost/flowcell/42JUYAAXX/>
+ a libns:illumina_flowcell ;
+ libns:read_length 76 ;
+ libns:flowcell_type "Paired"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/4201> ;
+ libns:has_lane <http://localhost/lane/4202> ;
+ libns:has_lane <http://localhost/lane/4203> ;
+ libns:has_lane <http://localhost/lane/4204> ;
+ libns:has_lane <http://localhost/lane/4205> ;
+ libns:has_lane <http://localhost/lane/4206> ;
+ libns:has_lane <http://localhost/lane/4207> ;
+ libns:has_lane <http://localhost/lane/4208> ;
+ libns:flowcell_id "42JUYAAXX"@en .
+
+<http://localhost/lane/4201>
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number 1 .
+<http://localhost/lane/4202>
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number 2 .
+<http://localhost/lane/4203>
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number 3 .
+<http://localhost/lane/4204>
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number 4 .
+<http://localhost/lane/4205>
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number 5 .
+ # paired_end 1;
+ # read_length 76;
+ # status "Unknown"@en .
+<http://localhost/lane/4206>
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number 6 .
+<http://localhost/lane/4207>
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number 7 .
+<http://localhost/lane/4208>
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number 8 .
+
+<http://localhost/flowcell/61MJTAAXX/>
+ a libns:illumina_flowcell ;
+ libns:read_length 76 ;
+ libns:flowcell_type "Single"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/6601> ;
+ libns:has_lane <http://localhost/lane/6602> ;
+ libns:has_lane <http://localhost/lane/6603> ;
+ libns:has_lane <http://localhost/lane/6604> ;
+ libns:has_lane <http://localhost/lane/6605> ;
+ libns:has_lane <http://localhost/lane/6606> ;
+ libns:has_lane <http://localhost/lane/6607> ;
+ libns:has_lane <http://localhost/lane/6608> ;
+ libns:flowcell_id "61MJTAAXX"@en .
+
+<http://localhost/lane/6601>
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number 1 .
+<http://localhost/lane/6602>
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number 2 .
+<http://localhost/lane/6603>
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number 3 .
+<http://localhost/lane/6604>
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number 4 .
+<http://localhost/lane/6605>
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number 5 .
+<http://localhost/lane/6606>
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number 6 .
+ # paired_end 1;
+ # read_length 76;
+ # status "Unknown"@en .
+<http://localhost/lane/6607>
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number 7 .
+<http://localhost/lane/6608>
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number 8 .
+
+<http://localhost/flowcell/30DY0AAXX/>
+ a libns:illumina_flowcell ;
+ libns:read_length 76 ;
+ libns:flowcell_type "Paired"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/3801> ;
+ libns:has_lane <http://localhost/lane/3802> ;
+ libns:has_lane <http://localhost/lane/3803> ;
+ libns:has_lane <http://localhost/lane/3804> ;
+ libns:has_lane <http://localhost/lane/3805> ;
+ libns:has_lane <http://localhost/lane/3806> ;
+ libns:has_lane <http://localhost/lane/3807> ;
+ libns:has_lane <http://localhost/lane/3808> ;
+ libns:flowcell_id "30DY0AAXX"@en .
+
+<http://localhost/lane/3801>
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number 1 .
+<http://localhost/lane/3802>
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number 2 .
+<http://localhost/lane/3803>
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number 3 .
+<http://localhost/lane/3804>
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number 4 .
+<http://localhost/lane/3805>
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number 5 .
+<http://localhost/lane/3806>
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number 6 .
+<http://localhost/lane/3807>
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number 7 .
+<http://localhost/lane/3808>
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number 8 .
+ # paired_end 1;
+ # read_length 76;
+ # status "Unknown"@en .
+
+<http://localhost/flowcell/C02F9ACXX/>
+ a libns:illumina_flowcell ;
+ libns:read_length 101 ;
+ libns:flowcell_type "Paired"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/12300> ;
+ libns:has_lane <http://localhost/lane/12500> ;
+ libns:flowcell_id "C02F9ACXX"@en .
+
+<http://localhost/lane/12300>
+ libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
+ libns:library <http://localhost/library/12345/> ;
+ libns:lane_number 3 .
+ # paired_end 1;
+ # read_length 101;
+ # status "Unknown"@en .
+
+<http://localhost/lane/12500>
+ libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number 3 .
+ # paired_end 1;
+ # read_length 101;
+ # status "Unknown"@en .
+
+<http://localhost/library/11154/>
+ a libns:library ;
+ libns:affiliation "TSR"@en;
+ libns:concentration "29.7";
+ libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
+ libns:experiment_type "RNA-seq"@en ;
+ libns:gel_cut 300 ;
+ libns:has_lane <http://localhost/lane/3404> ;
+ libns:has_lane <http://localhost/lane/4205> ;
+ libns:has_lane <http://localhost/lane/6606> ;
+ libns:has_lane <http://localhost/lane/3808> ;
+ libns:has_lane <http://localhost/lane/12500> ;
+ libns:insert_size 2000 ;
+ libns:library_id "11154"@en ;
+ libns:library_type "Paired End (Multiplexed)"@en ;
+ libns:made_by "Gary Gygax"@en ;
+ libns:name "Paired Ends ASDF"@en ;
+ libns:replicate "1"@en;
+ libns:species "Mus musculus"@en ;
+ libns:stopping_point "Completed"@en ;
+ libns:total_unique_locations 8841201 .
+ # cell_line
+
+
+<http://localhost/library/12345/>
+ a libns:library ;
+ libns:affiliation "TSR"@en;
+ libns:concentration "12.345";
+ libns:cell_line "Unknown"@en ;
+ libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
+ libns:experiment_type "RNA-seq"@en ;
+ libns:gel_cut 300 ;
+ libns:has_lane <http://localhost/lane/12300> ;
+ libns:insert_size 2000 ;
+ libns:library_id "12345"@en ;
+ libns:library_type "Paired End (Multiplexed)"@en ;
+ libns:made_by "Gary Gygax"@en ;
+ libns:name "Paired Ends THING"@en ;
+ libns:replicate "1"@en;
+ libns:species "Mus musculus"@en ;
+ libns:stopping_point "Completed"@en ;
+ libns:total_unique_locations 8841201 .
+ # cell_line
+"""
+HOST = "http://localhost"
class TestCondorFastq(unittest.TestCase):
def setUp(self):
with open(filename, 'w') as stream:
stream.write('testfile')
- self.subname = unicode('sub-11154')
- self.subdir = os.path.join(self.tempdir, self.subname)
- os.mkdir(self.subdir)
-
self.result_map = ResultMap()
- self.result_map.add_result('11154', self.subname)
+ for lib_id in [u'11154', u'12345']:
+ subname = 'sub-%s' % (lib_id,)
+ sub_dir = os.path.join(self.tempdir, subname)
+ os.mkdir(sub_dir)
+ self.result_map[lib_id] = sub_dir
+
+ self.extract = CondorFastqExtract(HOST,
+ self.flowcelldir,
+ self.logdir)
+ load_string_into_model(self.extract.model, 'turtle', lib_turtle)
def tearDown(self):
shutil.rmtree(self.tempdir)
os.chdir(self.cwd)
+ def test_find_relavant_flowcell_ids(self):
+ expected = set(('30221AAXX',
+ '42JUYAAXX',
+ '61MJTAAXX',
+ '30DY0AAXX',
+ 'C02F9ACXX'))
+ flowcell_ids = self.extract.find_relavant_flowcell_ids()
+ self.assertEqual(flowcell_ids, expected)
+
def test_find_archive_sequence(self):
- extract = condorfastq.CondorFastqExtract('host',
- FAKE_APIDATA,
- self.tempdir,
- self.logdir)
- extract.api = FakeApi()
-
- lib_db = extract.find_archive_sequence_files(self.result_map)
-
- self.failUnlessEqual(len(lib_db['11154']['lanes']), 5)
- lanes = [
- lib_db['11154']['lanes'][(u'30221AAXX', 4)],
- lib_db['11154']['lanes'][(u'42JUYAAXX', 5)],
- lib_db['11154']['lanes'][(u'61MJTAAXX', 6)],
- lib_db['11154']['lanes'][(u'30DY0AAXX', 8)],
- lib_db['11154']['lanes'][(u'C02F9ACXX', 3)],
- ]
- self.failUnlessEqual(len(lanes[0]), 1)
- self.failUnlessEqual(len(lanes[1]), 2)
- self.failUnlessEqual(len(lanes[2]), 1)
- self.failUnlessEqual(len(lanes[3]), 1)
- self.failUnlessEqual(len(lanes[4]), 4)
+ seqs = self.extract.find_archive_sequence_files(self.result_map)
+
+ expected = set([
+ (u'11154', u'42JUYAAXX', 5, 1, 76, True, 'qseq'),
+ (u'11154', u'42JUYAAXX', 5, 2, 76, True, 'qseq'),
+ (u'11154', u'61MJTAAXX', 6, 1, 76, False, 'qseq'),
+ (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+ (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+ (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+ (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+ (u'11154', u'30221AAXX', 4, 1, 33, False, 'srf'),
+ (u'11154', u'30DY0AAXX', 8, 1, 151, True, 'srf')
+ ])
+ found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
+ self.assertEqual(expected, found)
def test_find_needed_targets(self):
+ lib_db = self.extract.find_archive_sequence_files(self.result_map)
- extract = condorfastq.CondorFastqExtract('host',
- FAKE_APIDATA,
- self.tempdir,
- self.logdir)
- extract.api = FakeApi()
- lib_db = extract.find_archive_sequence_files(self.result_map)
-
- needed_targets = extract.find_missing_targets(self.result_map,
- lib_db)
- self.failUnlessEqual(len(needed_targets), 7)
+ needed_targets = self.extract.update_fastq_targets(self.result_map,
+ lib_db)
+ self.assertEqual(len(needed_targets), 9)
srf_30221 = needed_targets[
- self.subname + u'/11154_30221AAXX_c33_l4.fastq']
+ self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
qseq_42JUY_r1 = needed_targets[
- self.subname + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
+ self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
qseq_42JUY_r2 = needed_targets[
- self.subname + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
+ self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
qseq_61MJT = needed_targets[
- self.subname + u'/11154_61MJTAAXX_c76_l6.fastq']
+ self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
split_C02F9_r1 = needed_targets[
- self.subname + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
+ self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
split_C02F9_r2 = needed_targets[
- self.subname + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
+ self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
- self.failUnlessEqual(len(srf_30221['srf']), 1)
- self.failUnlessEqual(len(qseq_42JUY_r1['qseq']), 1)
- self.failUnlessEqual(len(qseq_42JUY_r2['qseq']), 1)
- self.failUnlessEqual(len(qseq_61MJT['qseq']), 1)
- self.failUnlessEqual(len(split_C02F9_r1['split_fastq']), 2)
- self.failUnlessEqual(len(split_C02F9_r2['split_fastq']), 2)
-
- #print '-------needed targets---------'
- #pprint(needed_targets)
+ self.assertEqual(len(srf_30221['srf']), 1)
+ self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
+ self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
+ self.assertEqual(len(qseq_61MJT['qseq']), 1)
+ self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
+ self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
def test_generate_fastqs(self):
- extract = condorfastq.CondorFastqExtract('host',
- FAKE_APIDATA,
- self.tempdir,
- self.logdir)
- extract.api = FakeApi()
- commands = extract.build_condor_arguments(self.result_map)
+ commands = self.extract.build_condor_arguments(self.result_map)
srf = commands['srf']
qseq = commands['qseq']
split = commands['split_fastq']
- self.failUnlessEqual(len(srf), 2)
- self.failUnlessEqual(len(qseq), 3)
- self.failUnlessEqual(len(split), 2)
+ self.assertEqual(len(srf), 2)
+ self.assertEqual(len(qseq), 3)
+ self.assertEqual(len(split), 4)
srf_data = {
- os.path.join(self.subname, '11154_30221AAXX_c33_l4.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_30221AAXX_c33_l4.fastq'): {
'mid': None,
'ispaired': False,
'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
'flowcell': u'30221AAXX',
- 'target': os.path.join(self.subname,
+ 'target': os.path.join(self.result_map['11154'],
u'11154_30221AAXX_c33_l4.fastq'),
},
- os.path.join(self.subname, '11154_30DY0AAXX_c151_l8_r1.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_30DY0AAXX_c151_l8_r1.fastq'): {
'mid': None,
'ispaired': True,
'flowcell': u'30DY0AAXX',
'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
'mid': 76,
'target':
- os.path.join(self.subname,
+ os.path.join(self.result_map['11154'],
u'11154_30DY0AAXX_c151_l8_r1.fastq'),
'target_right':
- os.path.join(self.subname,
+ os.path.join(self.result_map['11154'],
u'11154_30DY0AAXX_c151_l8_r2.fastq'),
}
}
for args in srf:
expected = srf_data[args['target']]
- self.failUnlessEqual(args['ispaired'], expected['ispaired'])
- self.failUnlessEqual(len(args['sources']), 1)
+ self.assertEqual(args['ispaired'], expected['ispaired'])
+ self.assertEqual(len(args['sources']), 1)
_, source_filename = os.path.split(args['sources'][0])
- self.failUnlessEqual(source_filename, expected['sources'][0])
- self.failUnlessEqual(args['target'], expected['target'])
+ self.assertEqual(source_filename, expected['sources'][0])
+ self.assertEqual(args['target'], expected['target'])
if args['ispaired']:
- self.failUnlessEqual(args['target_right'],
+ self.assertEqual(args['target_right'],
expected['target_right'])
if 'mid' in expected:
- self.failUnlessEqual(args['mid'], expected['mid'])
+ self.assertEqual(args['mid'], expected['mid'])
qseq_data = {
- os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r1.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_42JUYAAXX_c76_l5_r1.fastq'): {
'istar': True,
'ispaired': True,
'sources': [
u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
},
- os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r2.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_42JUYAAXX_c76_l5_r2.fastq'): {
'istar': True,
'ispaired': True,
'sources': [
u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
},
- os.path.join(self.subname, '11154_61MJTAAXX_c76_l6.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_61MJTAAXX_c76_l6.fastq'): {
'istar': True,
'ispaired': False,
'sources': [
}
for args in qseq:
expected = qseq_data[args['target']]
- self.failUnlessEqual(args['istar'], expected['istar'])
- self.failUnlessEqual(args['ispaired'], expected['ispaired'])
+ self.assertEqual(args['istar'], expected['istar'])
+ self.assertEqual(args['ispaired'], expected['ispaired'])
for i in range(len(expected['sources'])):
_, filename = os.path.split(args['sources'][i])
- self.failUnlessEqual(filename, expected['sources'][i])
+ self.assertEqual(filename, expected['sources'][i])
split_test = dict((( x['target'], x) for x in
{'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
u'11154_NoIndex_L003_R2_002.fastq.gz'],
'pyscript': 'desplit_fastq.pyc',
- 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'}]
+ 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
+ {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
+ u'12345_CGATGT_L003_R1_002.fastq.gz',
+ u'12345_CGATGT_L003_R1_003.fastq.gz',
+ ],
+ 'pyscript': 'desplit_fastq.pyc',
+ 'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
+ {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
+ u'12345_CGATGT_L003_R2_002.fastq.gz',
+ u'12345_CGATGT_L003_R2_003.fastq.gz',
+ ],
+ 'pyscript': 'desplit_fastq.pyc',
+ 'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
+ ]
))
for arg in split:
_, target = os.path.split(arg['target'])
pyscript = split_test[target]['pyscript']
- self.failUnless(arg['pyscript'].endswith(pyscript))
+ self.assertTrue(arg['pyscript'].endswith(pyscript))
filename = split_test[target]['target']
- self.failUnless(arg['target'].endswith(filename))
+ self.assertTrue(arg['target'].endswith(filename))
for s_index in range(len(arg['sources'])):
s1 = arg['sources'][s_index]
s2 = split_test[target]['sources'][s_index]
- self.failUnless(s1.endswith(s2))
-
- #print '-------commands---------'
- #pprint (commands)
+ self.assertTrue(s1.endswith(s2))
def test_create_scripts(self):
- os.chdir(self.tempdir)
- extract = condorfastq.CondorFastqExtract('host',
- FAKE_APIDATA,
- self.tempdir,
- self.logdir)
- extract.api = FakeApi()
- extract.create_scripts(self.result_map)
-
- self.failUnless(os.path.exists('srf.condor'))
+ self.extract.create_scripts(self.result_map)
+
+ self.assertTrue(os.path.exists('srf.condor'))
with open('srf.condor', 'r') as srf:
arguments = [ l for l in srf if l.startswith('argument') ]
arguments.sort()
- self.failUnlessEqual(len(arguments), 2)
- self.failUnless('--single sub-11154/11154_30221AAXX_c33_l4.fastq'
+ self.assertEqual(len(arguments), 2)
+ self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
in arguments[0])
- self.failUnless(
- '--right sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
+ self.assertTrue(
+ 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
arguments[1])
- self.failUnless(os.path.exists('qseq.condor'))
+ self.assertTrue(os.path.exists('qseq.condor'))
with open('qseq.condor', 'r') as srf:
arguments = [ l for l in srf if l.startswith('argument') ]
arguments.sort()
- self.failUnlessEqual(len(arguments), 3)
- self.failUnless('-o sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
+ self.assertEqual(len(arguments), 3)
+ self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
arguments[0])
- self.failUnless(
+ self.assertTrue(
'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
arguments[1])
- self.failUnless('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
+ self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
arguments[2])
- self.failUnless(os.path.exists('split_fastq.condor'))
+ self.assertTrue(os.path.exists('split_fastq.condor'))
with open('split_fastq.condor', 'r') as split:
arguments = [ l for l in split if l.startswith('argument') ]
arguments.sort()
- self.failUnlessEqual(len(arguments), 2)
- self.failUnless('11154_NoIndex_L003_R1_001.fastq.gz' in \
+ self.assertEqual(len(arguments), 4)
+ # Lane 3 Read 1
+ self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
arguments[0])
- self.failUnless('11154_NoIndex_L003_R2_002.fastq.gz' in \
+ # Lane 3 Read 2
+ self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
arguments[1])
+ # Lane 3 Read 1
+ self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
+ self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
+ self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
+ self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
+
+ # Lane 3 Read 2
+ self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
+ self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
+ self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
+ self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
+
def suite():
suite = unittest.makeSuite(TestCondorFastq, 'test')
result_map = results.ResultMap()
result_dir = os.path.join(self.sourcedir,
test_results.S1_NAME)
- result_map.add_result('1000', result_dir)
+ result_map['1000'] = result_dir
submission.link_daf(result_map)
def tearDown(self):
shutil.rmtree(self.tempdir)
-
def test_dict_like(self):
"""Make sure the result map works like an ordered dictionary
"""
results = ResultMap()
- results.add_result('1000', 'dir1000')
- results.add_result('2000', 'dir2000')
- results.add_result('1500', 'dir1500')
+ results['1000'] = 'dir1000'
+ results['2000'] = 'dir2000'
+ results['1500'] = 'dir1500'
self.failUnlessEqual(results.keys(), ['1000', '2000', '1500'])
self.failUnlessEqual(list(results.values()),
self.failUnlessEqual(results['1500'], 'dir1500')
self.failUnlessEqual(results['2000'], 'dir2000')
+ self.assertTrue(u'2000' in results)
+ self.assertTrue('2000' in results)
+ self.assertFalse(u'77777' in results)
+ self.assertFalse('77777' in results)
+
def test_make_from(self):
results = ResultMap()
- results.add_result('1000', S1_NAME)
- results.add_result('2000', S2_NAME)
+ results['1000'] = S1_NAME
+ results['2000'] = S2_NAME
results.make_tree_from(self.sourcedir, self.resultdir)
?file ucscDaf:filename ?filename ;
ucscDaf:md5sum ?md5sum ;
- libraryOntology:has_lane ?lane ;
+ libraryOntology:library ?library ;
a ?file_type .
?file_type a <{{file_class}}> ;
geoSoft:fileTypeLabel ?file_type_label .
select distinct ?name ?cell ?antibody ?sex ?control ?strain ?controlId ?library_id ?treatment ?protocol ?readType ?insertLength ?replicate, ?mapAlgorithm ?species_name ?taxon_id ?extractMolecule ?growthProtocol ?extractProtocol ?dataProtocol ?experiment_type ?library_selection ?library_source
WHERE {
- <{{submission}}> a submissionOntology:submission .
+ <{{submission}}> a submissionOntology:submission ;
+ submissionOntology:library ?library ;
+ submissionOntology:name ?name .
OPTIONAL { <{{submission}}> ucscDaf:control ?control }
OPTIONAL { <{{submission}}> ucscDaf:controlId ?controlId }
OPTIONAL { ?library libraryOntology:antibody ?antibody }
OPTIONAL { ?library libraryOntology:cell_line ?cell .
- ?cell_line cells:cell ?cell ;
- cells:documents ?growthProtocol . }
+ OPTIONAL { ?cell_line cells:cell ?cell ;
+ cells:documents ?growthProtocol . }}
OPTIONAL { ?library ucscDaf:sex ?sex }
OPTIONAL { ?library libraryOntology:library_id ?library_id }
OPTIONAL { ?library libraryOntology:replicate ?replicate }
- OPTIONAL { ?library libraryOntology:species ?species_name }
+ OPTIONAL { ?library libraryOntology:species ?species_name .
+ ?species libraryOntology:species ?species_name ;
+ libraryOntology:taxon_id ?taxon_id . }
OPTIONAL { ?library libraryOntology:condition_term ?treatment }
OPTIONAL { ?library libraryOntology:experiment_type ?experiment_type }
OPTIONAL { ?library libraryOntology:librarySelection ?library_selection }
OPTIONAL { ?library libraryOntology:insert_size ?insertLength }
OPTIONAL { ?library ucscDaf:mapAlgorithm ?mapAlgorithm }
- <{{submission}}> submissionOntology:library ?library ;
- submissionOntology:name ?name .
?species libraryOntology:species ?species_name ;
libraryOntology:taxon_id ?taxon_id .
-{% for name, value in series %}{{name}}={{value}}
-{% endfor %}{% for row in samples %}^SAMPLE={{row.name}}
+{% for name, value in series %}
+{{name}}={{value}}{% endfor %}{% for row in samples %}
+^SAMPLE={{row.name}}
!Sample_type=SRA
!Sample_title={{row.name}}
!Sample_series_id={{ series_id }}
!Sample_extract_protocol={{ row.extractProtocol|safe }}
!Sample_data_processing={{ row.dataProtocol|safe }}
!Sample_molecule_ch1={{ row.extractMolecule }}
-!Sample_characteristics_ch1=labExpId: {{ row.library_id }}
-!Sample_characteristics_ch1=replicate: {{ row.replicate }}
-{% if row.cell %}{% spaceless %}
-!Sample_characteristics_ch1=cell: {{ row.cell }}
-{% endspaceless %}{% endif %}
-{% if row.readType %}{% spaceless %}
-!Sample_characteristics_ch1=readType: {{ row.readType }}
-{% endspaceless %}{% endif %}{% if row.antibody %}{% spaceless %}
-!Sample_characteristics_ch1=cell: {{ row.antibody }}
-{% endspaceless %}{% endif %}{% for run in row.run %}
-!Sample_characteristics_ch1=Illumina image processing pipeline version: {{ run.image_software }}-{{ run.image_version }}
-!Sample_characteristics_ch1=Illumina base-calling pipeline version: {{ run.image_software }}-{{ run.image_version }}{% endfor %}{% for raw in row.raw %}
+!Sample_characteristics_ch1=labExpId: {{ row.library_id }}{% if row.replicate %}
+!Sample_characteristics_ch1=replicate: {{ row.replicate }}{% endif %}{% if row.cell %}
+!Sample_characteristics_ch1=cell: {{ row.cell }}{% endif %}{% if row.readType %}
+!Sample_characteristics_ch1=readType: {{ row.readType }}{% endif %}{% if row.antibody %}
+!Sample_characteristics_ch1=cell: {{ row.antibody }}{% endif %}{% for run in row.run %}{% if run.image_software %}
+!Sample_characteristics_ch1=Illumina image processing pipeline version: {{ run.image_software }}-{{ run.image_version }}{% endif %}{% if run.basecall_software %}
+!Sample_characteristics_ch1=Illumina base-calling pipeline version: {{ run.basecall_software }}-{{ run.basecall_version }}{% endif %}{% endfor %}{% for raw in row.raw %}
!Sample_raw_file_{{forloop.counter}}={{ raw.filename }}
!Sample_raw_file_type_{{forloop.counter}}={{raw.file_type_label}}
!Sample_raw_file_insert_size_{{forloop.counter}}={{ row.insertLength }}
"""
import collections
from datetime import datetime
+from glob import glob
from urlparse import urlparse, urlunparse
from urllib2 import urlopen
import logging
logger = logging.getLogger(__name__)
-# standard ontology namespaces
-owlNS = RDF.NS('http://www.w3.org/2002/07/owl#')
-dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
-rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
-rdfsNS = RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
-xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
-
-# internal ontologies
-submissionOntology = RDF.NS(
- "http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
-dafTermOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
-libraryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
-inventoryOntology = RDF.NS(
- "http://jumpgate.caltech.edu/wiki/InventoryOntology#")
-submissionLog = RDF.NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
-geoSoftNS = RDF.NS('http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#')
+from htsworkflow.util.rdfns import *
+
+SCHEMAS_URL='http://jumpgate.caltech.edu/phony/schemas'
+INFERENCE_URL='http://jumpgate.caltech.edu/phony/inference'
ISOFORMAT_MS = "%Y-%m-%dT%H:%M:%S.%f"
ISOFORMAT_SHORT = "%Y-%m-%dT%H:%M:%S"
-
def sparql_query(model, query_filename, output_format='text'):
"""Execute sparql query from file
"""
>>> simplify_uri('http://asdf.org/foo/bar?was=foo')
'was=foo'
"""
+ if isinstance(uri, RDF.Node):
+ if uri.is_resource():
+ uri = uri.uri
+ else:
+ raise ValueError("Can't simplify an RDF literal")
+ if isinstance(uri, RDF.Uri):
+ uri = str(uri)
+
parsed = urlparse(uri)
if len(parsed.query) > 0:
return parsed.query
return element
raise ValueError("Unable to simplify %s" % (uri,))
-def simplifyUri(namespace, term):
+def stripNamespace(namespace, term):
"""Remove the namespace portion of a term
returns None if they aren't in common
directory = os.getcwd()
if model_name is None:
- storage = RDF.MemoryStorage()
+ storage = RDF.MemoryStorage(options_string="contexts='yes'")
logger.info("Using RDF Memory model")
else:
- options = "hash-type='bdb',dir='{0}'".format(directory)
+ options = "contexts='yes',hash-type='bdb',dir='{0}'".format(directory)
storage = RDF.HashStorage(model_name,
options=options)
logger.info("Using {0} with options {1}".format(model_name, options))
def load_into_model(model, parser_name, path, ns=None):
+ if type(ns) in types.StringTypes:
+ ns = RDF.Uri(ns)
+
+ if isinstance(path, RDF.Node):
+ if path.is_resource():
+ path = str(path.uri)
+ else:
+ raise ValueError("url to load can't be a RDF literal")
+
url_parts = list(urlparse(path))
- if len(url_parts[0]) == 0:
+ if len(url_parts[0]) == 0 or url_parts[0] == 'file':
url_parts[0] = 'file'
url_parts[2] = os.path.abspath(url_parts[2])
+ if parser_name is None or parser_name == 'guess':
+ parser_name = guess_parser_by_extension(path)
url = urlunparse(url_parts)
- logger.info("Opening %s" % (url,))
- req = urlopen(url)
- logger.debug("request status: %s" % (req.code,))
- if parser_name is None:
- content_type = req.headers.get('Content-Type', None)
- parser_name = guess_parser(content_type, path)
- logger.debug("Guessed parser: %s" % (parser_name,))
- data = req.read()
- load_string_into_model(model, parser_name, data, ns)
+ logger.info("Opening {0} with parser {1}".format(url, parser_name))
+
+ rdf_parser = RDF.Parser(name=parser_name)
+ statements = []
+ retries = 3
+ while retries > 0:
+ try:
+ retries -= 1
+ statements = rdf_parser.parse_as_stream(url, ns)
+ retries = 0
+ except RDF.RedlandError, e:
+ errmsg = "RDF.RedlandError: {0} {1} tries remaining"
+ logger.error(errmsg.format(str(e), retries))
+
+ for s in statements:
+ conditionally_add_statement(model, s, ns)
def load_string_into_model(model, parser_name, data, ns=None):
+ ns = fixup_namespace(ns)
+ logger.debug("load_string_into_model parser={0}, len={1}".format(
+ parser_name, len(data)))
+ rdf_parser = RDF.Parser(name=parser_name)
+
+ for s in rdf_parser.parse_string_as_stream(data, ns):
+ conditionally_add_statement(model, s, ns)
+
+
+def fixup_namespace(ns):
if ns is None:
ns = RDF.Uri("http://localhost/")
+ elif type(ns) in types.StringTypes:
+ ns = RDF.Uri(ns)
+ elif not(isinstance(ns, RDF.Uri)):
+ errmsg = "Namespace should be string or uri not {0}"
+ raise ValueError(errmsg.format(str(type(ns))))
+ return ns
+
+
+def conditionally_add_statement(model, s, ns):
imports = owlNS['imports']
- rdf_parser = RDF.Parser(name=parser_name)
- for s in rdf_parser.parse_string_as_stream(data, ns):
- if s.predicate == imports:
- obj = str(s.object)
- logger.info("Importing %s" % (obj,))
- load_into_model(model, None, obj, ns)
- if s.object.is_literal():
- value_type = get_node_type(s.object)
- if value_type == 'string':
- s.object = sanitize_literal(s.object)
- model.add_statement(s)
+ if s.predicate == imports:
+ obj = str(s.object)
+ logger.info("Importing %s" % (obj,))
+ load_into_model(model, None, obj, ns)
+ if s.object.is_literal():
+ value_type = get_node_type(s.object)
+ if value_type == 'string':
+ s.object = sanitize_literal(s.object)
+ model.add_statement(s)
+
+
+def add_default_schemas(model, schema_path=None):
+ """Add default schemas to a model
+ Looks for turtle files in either htsworkflow/util/schemas
+ or in the list of directories provided in schema_path
+ """
+
+ if schema_path is None:
+ path, _ = os.path.split(__file__)
+ schema_path = [os.path.join(path, 'schemas')]
+ elif type(schema_path) in types.StringTypes:
+ schema_path = [schema_path]
+
+ for p in schema_path:
+ for f in glob(os.path.join(p, '*.turtle')):
+ add_schema(model, f)
+
+def add_schema(model, filename):
+ """Add a schema to a model.
+
+ Main difference from 'load_into_model' is it tags it with
+ a RDFlib context so I can remove them later.
+ """
+ parser = RDF.Parser(name='turtle')
+ context = RDF.Node(RDF.Uri(SCHEMAS_URL))
+ url = 'file://' + filename
+ for s in parser.parse_as_stream(url):
+ try:
+ model.append(s, context)
+ except RDF.RedlandError as e:
+ logger.error("%s with %s", str(e), str(s))
+
+
+def remove_schemas(model):
+ """Remove statements labeled with our schema context"""
+ context = RDF.Node(RDF.Uri(SCHEMAS_URL))
+ model.context_remove_statements(context)
def sanitize_literal(node):
def guess_parser(content_type, pathname):
- if content_type in ('application/rdf+xml'):
+ if content_type in ('application/rdf+xml',):
return 'rdfxml'
- elif content_type in ('application/x-turtle'):
+ elif content_type in ('application/x-turtle',):
return 'turtle'
- elif content_type in ('text/html'):
+ elif content_type in ('text/html',):
+ return 'rdfa'
+ elif content_type is None or content_type in ('text/plain',):
+ return guess_parser_by_extension(pathname)
+
+def guess_parser_by_extension(pathname):
+ _, ext = os.path.splitext(pathname)
+ if ext in ('.xml', '.rdf'):
+ return 'rdfxml'
+ elif ext in ('.html',):
return 'rdfa'
- elif content_type is None:
- _, ext = os.path.splitext(pathname)
- if ext in ('xml', 'rdf'):
- return 'rdfxml'
- elif ext in ('html'):
- return 'rdfa'
- elif ext in ('turtle'):
- return 'turtle'
+ elif ext in ('.turtle',):
+ return 'turtle'
return 'guess'
def get_serializer(name='turtle'):
"""
writer = RDF.Serializer(name=name)
# really standard stuff
- writer.set_namespace('owl', owlNS._prefix)
writer.set_namespace('rdf', rdfNS._prefix)
writer.set_namespace('rdfs', rdfsNS._prefix)
+ writer.set_namespace('owl', owlNS._prefix)
+ writer.set_namespace('dc', dcNS._prefix)
+ writer.set_namespace('xml', xmlNS._prefix)
writer.set_namespace('xsd', xsdNS._prefix)
+ writer.set_namespace('vs', vsNS._prefix)
+ writer.set_namespace('wot', wotNS._prefix)
# should these be here, kind of specific to an application
writer.set_namespace('libraryOntology', libraryOntology._prefix)
return writer
-def dump_model(model):
+def dump_model(model, destination=None):
+ if destination is None:
+ destination = sys.stdout
serializer = get_serializer()
- print serializer.serialize_model_to_string(model)
+ destination.write(serializer.serialize_model_to_string(model))
+ destination.write(os.linesep)
--- /dev/null
+import logging
+import os
+import sys
+
+import RDF
+
+from htsworkflow.util.rdfns import *
+from htsworkflow.util.rdfhelp import SCHEMAS_URL
+
+INFER_URL='http://jumpgate.caltech.edu/phony/infer'
+
+class Infer(object):
+ """Provide some simple inference.
+
+ Provides a few default rules as methods starting with _rule_
+ """
+ def __init__(self, model):
+ self.model = model
+ self._context = RDF.Node(RDF.Uri(INFER_URL))
+
+
+ def think(self, max_iterations=None):
+ """Update model with with inferred statements.
+
+ max_iterations puts a limit on the number of times we
+ run through the loop.
+
+ it will also try to exit if nothing new has been inferred.
+
+ Also this is the naive solution.
+ There's probably better ones out there.
+ """
+ iterations = 0
+ while max_iterations is None or iterations != max_iterations:
+ starting_size = self.model.size()
+
+ for method_name in dir(self):
+ if method_name.startswith('_rule_'):
+ method = getattr(self, method_name)
+ method()
+ if self.model.size() == starting_size:
+ # we didn't add anything new
+ return
+
+ def validate(self, destination=None):
+ if destination is None:
+ destination = sys.stdout
+
+ for msg in self.run_validation():
+ destination.write(msg)
+ destination.write(os.linesep)
+
+ def run_validation(self):
+ """Apply validation rules to our model.
+ """
+ for method_name in dir(self):
+ if method_name.startswith('_validate_'):
+ method = getattr(self, method_name)
+ for msg in method():
+ yield msg
+
+
+ def _rule_class(self):
+ """resolve class chains.
+ e.g. if a is an BClass, and a BClass is an AClass
+ then a is both a BClass and AClass.
+ """
+ body = """
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix owl: <http://www.w3.org/2002/07/owl#>
+
+ select ?obj ?class
+ where {
+ ?alias a ?class .
+ ?obj a ?alias .
+ }"""
+ query = RDF.SPARQLQuery(body)
+ for r in query.execute(self.model):
+ s = RDF.Statement(r['obj'], rdfNS['type'], r['class'])
+ if s not in self.model:
+ self.model.append(s, self._context)
+
+ def _rule_subclass(self):
+ """A subclass is a parent class
+ """
+ body = """
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix owl: <http://www.w3.org/2002/07/owl#>
+
+ select ?obj ?subclass ?parent
+ where {
+ ?subclass rdfs:subClassOf ?parent .
+ ?obj a ?subclass .
+ }"""
+ query = RDF.SPARQLQuery(body)
+ for r in query.execute(self.model):
+ s = RDF.Statement(r['obj'], rdfNS['type'], r['parent'])
+ if s not in self.model:
+ self.model.append(s, self._context)
+
+ def _rule_inverse_of(self):
+ """Add statements computed with inverseOf
+ """
+ body = """
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix owl: <http://www.w3.org/2002/07/owl#>
+
+ select ?o ?reverse ?s
+ where {
+ ?s ?term ?o .
+ ?s a ?subject_type .
+ ?o a ?object_type .
+ ?term owl:inverseOf ?reverse .
+ ?term rdfs:domain ?subject_type ;
+ rdfs:range ?object_type .
+ ?reverse rdfs:domain ?object_type ;
+ rdfs:range ?subject_type .
+ }"""
+ query = RDF.SPARQLQuery(body)
+
+ statements = []
+ for r in query.execute(self.model):
+ s = RDF.Statement(r['o'], r['reverse'], r['s'])
+ if s not in self.model:
+ self.model.append(s, self._context)
+
+
+ def _validate_types(self):
+ body = """
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix owl: <http://www.w3.org/2002/07/owl#>
+
+ select ?subject ?predicate ?object
+ where {
+ ?subject ?predicate ?object
+ OPTIONAL { ?subject a ?class }
+ FILTER(!bound(?class))
+ }
+ """
+ query = RDF.SPARQLQuery(body)
+ errmsg = "Missing type for: {0}"
+ for r in query.execute(self.model):
+ yield errmsg.format(str(r['subject']))
+
+ def _validate_undefined_properties(self):
+ """Find properties that aren't defined.
+ """
+ body = """
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix owl: <http://www.w3.org/2002/07/owl#>
+
+ select ?subject ?predicate ?object
+ where {
+ ?subject ?predicate ?object
+ OPTIONAL { ?predicate a ?predicate_class }
+ FILTER(!bound(?predicate_class))
+ }"""
+ query = RDF.SPARQLQuery(body)
+ msg = "Undefined property in {0} {1} {2}"
+ for r in query.execute(self.model):
+ yield msg.format(str(r['subject']),
+ str(r['predicate']),
+ str(r['object']))
+
+ def _validate_property_types(self):
+ """Find resources that don't have a type
+ """
+ property_template = """
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+
+ select ?type
+ where {{
+ <{predicate}> a rdf:Property ;
+ {space} ?type .
+ }}"""
+
+ wrong_domain_type = "Domain of {0} was not {1}"
+ wrong_range_type = "Range of {0} was not {1}"
+
+ count = 0
+ schema = RDF.Node(RDF.Uri(SCHEMAS_URL))
+ for s, context in self.model.as_stream_context():
+ if context == schema:
+ continue
+ # check domain
+ query = RDF.SPARQLQuery(property_template.format(
+ predicate=s.predicate,
+ space='rdfs:domain'))
+ for r in query.execute(self.model):
+ if r['type'] == rdfsNS['Resource']:
+ continue
+ check = RDF.Statement(s.subject, rdfNS['type'], r['type'])
+ if not self.model.contains_statement(check):
+ yield wrong_domain_type.format(str(s),
+ str(r['type']))
+ # check range
+ query = RDF.SPARQLQuery(property_template.format(
+ predicate=s.predicate,
+ space='rdfs:range'))
+ for r in query.execute(self.model):
+ if r['type'] == rdfsNS['Resource']:
+ continue
+ check = RDF.Statement(s.object, rdfNS['type'], r['type'])
+ if not self.model.contains_statement(check):
+ yield wrong_range_type.format(str(s),
+ str(r['type']))
+
+ return
+
--- /dev/null
+"""Namespace definitions
+
+All in one place to make import rdfns.* work safely
+"""
+from RDF import NS
+
+# standard ontology namespaces
+rdfNS = NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+rdfsNS = NS("http://www.w3.org/2000/01/rdf-schema#")
+owlNS = NS('http://www.w3.org/2002/07/owl#')
+dcNS = NS("http://purl.org/dc/elements/1.1/")
+xmlNS = NS('http://www.w3.org/XML/1998/namespace')
+xsdNS = NS("http://www.w3.org/2001/XMLSchema#")
+vsNS = NS('http://www.w3.org/2003/06/sw-vocab-status/ns#')
+wotNS = NS('http://xmlns.com/wot/0.1/')
+
+# internal ontologies
+submissionOntology = NS(
+ "http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
+dafTermOntology = NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
+libraryOntology = NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
+inventoryOntology = NS(
+ "http://jumpgate.caltech.edu/wiki/InventoryOntology#")
+submissionLog = NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
+geoSoftNS = NS('http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#')
--- /dev/null
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix grddl: <http://www.w3.org/2003/g/data-view#> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix vs: <http://www.w3.org/2003/06/sw-vocab-status/ns#> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix wot: <http://xmlns.com/wot/0.1/> .
+
+# this is just a subset of dublin core
+<http://purl.org/dc/elements/1.1/>
+ dc:title "DCMI Metadata Terms" ;
+ rdfs:comment "Metadata terms maintained by the Dublin Core Metadata Initiative" ;
+ a owl:Ontology ;
+ rdfs:seeAlso <http://dublincore.org/documents/dcmi-terms/> .
+
+dc:title
+ a rdf:Property ;
+ rdfs:comment "A name given to the resource"@en ;
+ rdfs:range rdfs:Literal .
+
+dc:description
+ a rdf:Property ;
+ rdfs:label "Description"@en ;
+ rdfs:comment "An account of the resource"@en ;
+ rdfs:range rdfs:Literal .
--- /dev/null
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix grddl: <http://www.w3.org/2003/g/data-view#> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix vs: <http://www.w3.org/2003/06/sw-vocab-status/ns#> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix wot: <http://xmlns.com/wot/0.1/> .
+
+
+<http://www.w3.org/2002/07/owl> a owl:Ontology ;
+ dc:title "The OWL 2 Schema vocabulary (OWL 2)" ;
+ rdfs:comment """
+ This ontology partially describes the built-in classes and
+ properties that together form the basis of the RDF/XML syntax of OWL 2.
+ The content of this ontology is based on Tables 6.1 and 6.2
+ in Section 6.4 of the OWL 2 RDF-Based Semantics specification,
+ available at http://www.w3.org/TR/owl2-rdf-based-semantics/.
+ Please note that those tables do not include the different annotations
+ (labels, comments and rdfs:isDefinedBy links) used in this file.
+ Also note that the descriptions provided in this ontology do not
+ provide a complete and correct formal description of either the syntax
+ or the semantics of the introduced terms (please see the OWL 2
+ recommendations for the complete and normative specifications).
+ Furthermore, the information provided by this ontology may be
+ misleading if not used with care. This ontology SHOULD NOT be imported
+ into OWL ontologies. Importing this file into an OWL 2 DL ontology
+ will cause it to become an OWL 2 Full ontology and may have other,
+ unexpected, consequences.
+ """ ;
+ rdfs:isDefinedBy
+ <http://www.w3.org/TR/owl2-mapping-to-rdf/>,
+ <http://www.w3.org/TR/owl2-rdf-based-semantics/>,
+ <http://www.w3.org/TR/owl2-syntax/> ;
+ rdfs:seeAlso <http://www.w3.org/TR/owl2-rdf-based-semantics/#table-axiomatic-classes>,
+ <http://www.w3.org/TR/owl2-rdf-based-semantics/#table-axiomatic-properties> ;
+ owl:imports <http://www.w3.org/2000/01/rdf-schema> ;
+ owl:versionIRI <http://www.w3.org/2002/07/owl> ;
+ owl:versionInfo "$Date: 2009/11/15 10:54:12 $" ;
+ # grddl:namespaceTransformation <http://dev.w3.org/cvsweb/2009/owl-grddl/owx2rdf.xsl>
+ .
+
+
+owl:AllDifferent a rdfs:Class ;
+ rdfs:label "AllDifferent" ;
+ rdfs:comment "The class of collections of pairwise different individuals." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Resource .
+
+owl:AllDisjointClasses a rdfs:Class ;
+ rdfs:label "AllDisjointClasses" ;
+ rdfs:comment "The class of collections of pairwise disjoint classes." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Resource .
+
+owl:AllDisjointProperties a rdfs:Class ;
+ rdfs:label "AllDisjointProperties" ;
+ rdfs:comment "The class of collections of pairwise disjoint properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Resource .
+
+owl:Annotation a rdfs:Class ;
+ rdfs:label "Annotation" ;
+ rdfs:comment "The class of annotated annotations for which the RDF serialization consists of an annotated subject, predicate and object." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Resource .
+
+owl:AnnotationProperty a rdfs:Class ;
+ rdfs:label "AnnotationProperty" ;
+ rdfs:comment "The class of annotation properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdf:Property .
+
+owl:AsymmetricProperty a rdfs:Class ;
+ rdfs:label "AsymmetricProperty" ;
+ rdfs:comment "The class of asymmetric properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:ObjectProperty .
+
+owl:Axiom a rdfs:Class ;
+ rdfs:label "Axiom" ;
+ rdfs:comment "The class of annotated axioms for which the RDF serialization consists of an annotated subject, predicate and object." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Resource .
+
+owl:Class a rdfs:Class ;
+ rdfs:label "Class" ;
+ rdfs:comment "The class of OWL classes." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Class .
+
+owl:DataRange a rdfs:Class ;
+ rdfs:label "DataRange" ;
+ rdfs:comment "The class of OWL data ranges, which are special kinds of datatypes. Note: The use of the IRI owl:DataRange has been deprecated as of OWL 2. The IRI rdfs:Datatype SHOULD be used instead." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Datatype .
+
+owl:DatatypeProperty a rdfs:Class ;
+ rdfs:label "DatatypeProperty" ;
+ rdfs:comment "The class of data properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdf:Property .
+
+owl:DeprecatedClass a rdfs:Class ;
+ rdfs:label "DeprecatedClass" ;
+ rdfs:comment "The class of deprecated classes." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Class .
+
+owl:DeprecatedProperty a rdfs:Class ;
+ rdfs:label "DeprecatedProperty" ;
+ rdfs:comment "The class of deprecated properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdf:Property .
+
+owl:FunctionalProperty a rdfs:Class ;
+ rdfs:label "FunctionalProperty" ;
+ rdfs:comment "The class of functional properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdf:Property .
+
+owl:InverseFunctionalProperty a rdfs:Class ;
+ rdfs:label "InverseFunctionalProperty" ;
+ rdfs:comment "The class of inverse-functional properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:ObjectProperty .
+
+owl:IrreflexiveProperty a rdfs:Class ;
+ rdfs:label "IrreflexiveProperty" ;
+ rdfs:comment "The class of irreflexive properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:ObjectProperty .
+
+owl:NamedIndividual a rdfs:Class ;
+ rdfs:label "NamedIndividual" ;
+ rdfs:comment "The class of named individuals." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:Thing .
+
+owl:NegativePropertyAssertion a rdfs:Class ;
+ rdfs:label "NegativePropertyAssertion" ;
+ rdfs:comment "The class of negative property assertions." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Resource .
+
+owl:Nothing a owl:Class ;
+ rdfs:label "Nothing" ;
+ rdfs:comment "This is the empty class." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:Thing .
+
+owl:ObjectProperty a rdfs:Class ;
+ rdfs:label "ObjectProperty" ;
+ rdfs:comment "The class of object properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdf:Property .
+
+owl:Ontology a rdfs:Class ;
+ rdfs:label "Ontology" ;
+ rdfs:comment "The class of ontologies." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdfs:Resource .
+
+owl:OntologyProperty a rdfs:Class ;
+ rdfs:label "OntologyProperty" ;
+ rdfs:comment "The class of ontology properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf rdf:Property .
+
+owl:ReflexiveProperty a rdfs:Class ;
+ rdfs:label "ReflexiveProperty" ;
+ rdfs:comment "The class of reflexive properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:ObjectProperty .
+
+owl:Restriction a rdfs:Class ;
+ rdfs:label "Restriction" ;
+ rdfs:comment "The class of property restrictions." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:Class .
+
+owl:SymmetricProperty a rdfs:Class ;
+ rdfs:label "SymmetricProperty" ;
+ rdfs:comment "The class of symmetric properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:ObjectProperty .
+
+owl:TransitiveProperty a rdfs:Class ;
+ rdfs:label "TransitiveProperty" ;
+ rdfs:comment "The class of transitive properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:subClassOf owl:ObjectProperty .
+
+owl:Thing a owl:Class , rdfs:Class ;
+ rdfs:label "Thing" ;
+ rdfs:comment "The class of OWL individuals." ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> .
+
+owl:allValuesFrom a rdf:Property ;
+ rdfs:label "allValuesFrom" ;
+ rdfs:comment "The property that determines the class that a universal property restriction refers to." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Class .
+
+owl:annotatedProperty a rdf:Property ;
+ rdfs:label "annotatedProperty" ;
+ rdfs:comment "The property that determines the predicate of an annotated axiom or annotated annotation." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Resource .
+
+owl:annotatedSource a rdf:Property ;
+ rdfs:label "annotatedSource" ;
+ rdfs:comment "The property that determines the subject of an annotated axiom or annotated annotation." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Resource .
+
+owl:annotatedTarget a rdf:Property ;
+ rdfs:label "annotatedTarget" ;
+ rdfs:comment "The property that determines the object of an annotated axiom or annotated annotation." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Resource .
+
+owl:assertionProperty a rdf:Property ;
+ rdfs:label "assertionProperty" ;
+ rdfs:comment "The property that determines the predicate of a negative property assertion." ;
+ rdfs:domain owl:NegativePropertyAssertion ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:Property .
+
+owl:backwardCompatibleWith a owl:AnnotationProperty, owl:OntologyProperty ;
+ rdfs:label "backwardCompatibleWith" ;
+ rdfs:comment "The annotation property that indicates that a given ontology is backward compatible with another ontology." ;
+ rdfs:domain owl:Ontology ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Ontology .
+
+owl:bottomDataProperty a owl:DatatypeProperty ;
+ rdfs:label "bottomDataProperty" ;
+ rdfs:comment "The data property that does not relate any individual to any data value." ;
+ rdfs:domain owl:Thing ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Literal .
+
+owl:bottomObjectProperty a owl:ObjectProperty ;
+ rdfs:label "bottomObjectProperty" ;
+ rdfs:comment "The object property that does not relate any two individuals." ;
+ rdfs:domain owl:Thing ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Thing .
+
+owl:cardinality a rdf:Property ;
+ rdfs:label "cardinality" ;
+ rdfs:comment "The property that determines the cardinality of an exact cardinality restriction." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range xsd:nonNegativeInteger .
+
+owl:complementOf a rdf:Property ;
+ rdfs:label "complementOf" ;
+ rdfs:comment "The property that determines that a given class is the complement of another class." ;
+ rdfs:domain owl:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Class .
+
+owl:datatypeComplementOf a rdf:Property ;
+ rdfs:label "datatypeComplementOf" ;
+ rdfs:comment "The property that determines that a given data range is the complement of another data range with respect to the data domain." ;
+ rdfs:domain rdfs:Datatype ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Datatype .
+
+owl:deprecated a owl:AnnotationProperty ;
+ rdfs:label "deprecated" ;
+ rdfs:comment "The annotation property that indicates that a given entity has been deprecated." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Resource .
+
+owl:differentFrom a rdf:Property ;
+ rdfs:label "differentFrom" ;
+ rdfs:comment "The property that determines that two given individuals are different." ;
+ rdfs:domain owl:Thing ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Thing .
+
+owl:disjointUnionOf a rdf:Property ;
+ rdfs:label "disjointUnionOf" ;
+ rdfs:comment "The property that determines that a given class is equivalent to the disjoint union of a collection of other classes." ;
+ rdfs:domain owl:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:disjointWith a rdf:Property ;
+ rdfs:label "disjointWith" ;
+ rdfs:comment "The property that determines that two given classes are disjoint." ;
+ rdfs:domain owl:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Class .
+
+owl:distinctMembers a rdf:Property ;
+ rdfs:label "distinctMembers" ;
+ rdfs:comment "The property that determines the collection of pairwise different individuals in a owl:AllDifferent axiom." ;
+ rdfs:domain owl:AllDifferent ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:equivalentClass a rdf:Property ;
+ rdfs:label "equivalentClass" ;
+ rdfs:comment "The property that determines that two given classes are equivalent, and that is used to specify datatype definitions." ;
+ rdfs:domain rdfs:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Class .
+
+owl:equivalentProperty a rdf:Property ;
+ rdfs:label "equivalentProperty" ;
+ rdfs:comment "The property that determines that two given properties are equivalent." ;
+ rdfs:domain rdf:Property ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:Property .
+
+owl:hasKey a rdf:Property ;
+ rdfs:label "hasKey" ;
+ rdfs:comment "The property that determines the collection of properties that jointly build a key." ;
+ rdfs:domain owl:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:hasSelf a rdf:Property ;
+ rdfs:label "hasSelf" ;
+ rdfs:comment "The property that determines the property that a self restriction refers to." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Resource .
+
+owl:hasValue a rdf:Property ;
+ rdfs:label "hasValue" ;
+ rdfs:comment "The property that determines the individual that a has-value restriction refers to." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Resource .
+
+owl:imports a owl:OntologyProperty ;
+ rdfs:label "imports" ;
+ rdfs:comment "The property that is used for importing other ontologies into a given ontology." ;
+ rdfs:domain owl:Ontology ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Ontology .
+
+owl:incompatibleWith a owl:AnnotationProperty, owl:OntologyProperty ;
+ rdfs:label "incompatibleWith" ;
+ rdfs:comment "The annotation property that indicates that a given ontology is incompatible with another ontology." ;
+ rdfs:domain owl:Ontology ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Ontology .
+
+owl:intersectionOf a rdf:Property ;
+ rdfs:label "intersectionOf" ;
+ rdfs:comment "The property that determines the collection of classes or data ranges that build an intersection." ;
+ rdfs:domain rdfs:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:inverseOf a rdf:Property ;
+ rdfs:label "inverseOf" ;
+ rdfs:comment "The property that determines that two given properties are inverse." ;
+ rdfs:domain owl:ObjectProperty ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:ObjectProperty .
+
+owl:maxCardinality a rdf:Property ;
+ rdfs:label "maxCardinality" ;
+ rdfs:comment "The property that determines the cardinality of a maximum cardinality restriction." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range xsd:nonNegativeInteger .
+
+owl:maxQualifiedCardinality a rdf:Property ;
+ rdfs:label "maxQualifiedCardinality" ;
+ rdfs:comment "The property that determines the cardinality of a maximum qualified cardinality restriction." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range xsd:nonNegativeInteger .
+
+owl:members a rdf:Property ;
+ rdfs:label "members" ;
+ rdfs:comment "The property that determines the collection of members in either a owl:AllDifferent, owl:AllDisjointClasses or owl:AllDisjointProperties axiom." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:minCardinality a rdf:Property ;
+ rdfs:label "minCardinality" ;
+ rdfs:comment "The property that determines the cardinality of a minimum cardinality restriction." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range xsd:nonNegativeInteger .
+
+owl:minQualifiedCardinality a rdf:Property ;
+ rdfs:label "minQualifiedCardinality" ;
+ rdfs:comment "The property that determines the cardinality of a minimum qualified cardinality restriction." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range xsd:nonNegativeInteger .
+
+owl:onClass a rdf:Property ;
+ rdfs:label "onClass" ;
+ rdfs:comment "The property that determines the class that a qualified object cardinality restriction refers to." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Class .
+
+owl:onDataRange a rdf:Property ;
+ rdfs:label "onDataRange" ;
+ rdfs:comment "The property that determines the data range that a qualified data cardinality restriction refers to." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Datatype .
+
+owl:onDatatype a rdf:Property ;
+ rdfs:label "onDatatype" ;
+ rdfs:comment "The property that determines the datatype that a datatype restriction refers to." ;
+ rdfs:domain rdfs:Datatype ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Datatype .
+
+owl:oneOf a rdf:Property ;
+ rdfs:label "oneOf" ;
+ rdfs:comment "The property that determines the collection of individuals or data values that build an enumeration." ;
+ rdfs:domain rdfs:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:onProperties a rdf:Property ;
+ rdfs:label "onProperties" ;
+ rdfs:comment "The property that determines the n-tuple of properties that a property restriction on an n-ary data range refers to." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:onProperty a rdf:Property ;
+ rdfs:label "onProperty" ;
+ rdfs:comment "The property that determines the property that a property restriction refers to." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:Property .
+
+owl:priorVersion a owl:AnnotationProperty, owl:OntologyProperty ;
+ rdfs:label "priorVersion" ;
+ rdfs:comment "The annotation property that indicates the predecessor ontology of a given ontology." ;
+ rdfs:domain owl:Ontology ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Ontology .
+
+owl:propertyChainAxiom a rdf:Property ;
+ rdfs:label "propertyChainAxiom" ;
+ rdfs:comment "The property that determines the n-tuple of properties that build a sub property chain of a given property." ;
+ rdfs:domain owl:ObjectProperty ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:propertyDisjointWith a rdf:Property ;
+ rdfs:label "propertyDisjointWith" ;
+ rdfs:comment "The property that determines that two given properties are disjoint." ;
+ rdfs:domain rdf:Property ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:Property .
+
+owl:qualifiedCardinality a rdf:Property ;
+ rdfs:label "qualifiedCardinality" ;
+ rdfs:comment "The property that determines the cardinality of an exact qualified cardinality restriction." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range xsd:nonNegativeInteger .
+
+owl:sameAs a rdf:Property ;
+ rdfs:label "sameAs" ;
+ rdfs:comment "The property that determines that two given individuals are equal." ;
+ rdfs:domain owl:Thing ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Thing .
+
+owl:someValuesFrom a rdf:Property ;
+ rdfs:label "someValuesFrom" ;
+ rdfs:comment "The property that determines the class that an existential property restriction refers to." ;
+ rdfs:domain owl:Restriction ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Class .
+
+owl:sourceIndividual a rdf:Property ;
+ rdfs:label "sourceIndividual" ;
+ rdfs:comment "The property that determines the subject of a negative property assertion." ;
+ rdfs:domain owl:NegativePropertyAssertion ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Thing .
+
+owl:targetIndividual a rdf:Property ;
+ rdfs:label "targetIndividual" ;
+ rdfs:comment "The property that determines the object of a negative object property assertion." ;
+ rdfs:domain owl:NegativePropertyAssertion ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Thing .
+
+owl:targetValue a rdf:Property ;
+ rdfs:label "targetValue" ;
+ rdfs:comment "The property that determines the value of a negative data property assertion." ;
+ rdfs:domain owl:NegativePropertyAssertion ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Literal .
+
+owl:topDataProperty a owl:DatatypeProperty ;
+ rdfs:label "topDataProperty" ;
+ rdfs:comment "The data property that relates every individual to every data value." ;
+ rdfs:domain owl:Thing ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Literal .
+
+owl:topObjectProperty a owl:ObjectProperty ;
+ rdfs:label "topObjectProperty" ;
+ rdfs:comment "The object property that relates every two individuals." ;
+ rdfs:domain owl:Thing ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Thing .
+
+owl:unionOf a rdf:Property ;
+ rdfs:label "unionOf" ;
+ rdfs:comment "The property that determines the collection of classes or data ranges that build a union." ;
+ rdfs:domain rdfs:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
+
+owl:versionInfo a owl:AnnotationProperty ;
+ rdfs:label "versionInfo" ;
+ rdfs:comment "The annotation property that provides version information for an ontology or another OWL construct." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdfs:Resource .
+
+owl:versionIRI a owl:OntologyProperty ;
+ rdfs:label "versionIRI" ;
+ rdfs:comment "The property that identifies the version IRI of an ontology." ;
+ rdfs:domain owl:Ontology ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range owl:Ontology .
+
+owl:withRestrictions a rdf:Property ;
+ rdfs:label "withRestrictions" ;
+ rdfs:comment "The property that determines the collection of facet-value pairs that define a datatype restriction." ;
+ rdfs:domain rdfs:Datatype ;
+ rdfs:isDefinedBy <http://www.w3.org/2002/07/owl#> ;
+ rdfs:range rdf:List .
--- /dev/null
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix grddl: <http://www.w3.org/2003/g/data-view#> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix vs: <http://www.w3.org/2003/06/sw-vocab-status/ns#> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix wot: <http://xmlns.com/wot/0.1/> .
+
+<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ dc:description "This is the RDF Schema for the RDF vocabulary defined in the RDF namespace."@en ;
+ dc:title "The RDF Vocabulary (RDF)"@en ;
+ a owl:Ontology ;
+ rdfs:seeAlso <http://www.w3.org/2000/01/rdf-schema-more> .
+
+rdf:Alt
+ a rdfs:Class ;
+ rdfs:comment "The class of containers of alternatives." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Alt" ;
+ rdfs:subClassOf rdfs:Container .
+
+rdf:Bag
+ a rdfs:Class ;
+ rdfs:comment "The class of unordered containers." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Bag" ;
+ rdfs:subClassOf rdfs:Container .
+
+rdf:List
+ a rdfs:Class ;
+ rdfs:comment "The class of RDF Lists." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "List" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdf:PlainLiteral
+ a rdfs:Datatype ;
+ rdfs:comment "The class of plain (i.e. untyped) literal values." ;
+ rdfs:isDefinedBy <http://www.w3.org/TR/rdf-plain-literal/> ;
+ rdfs:label "PlainLiteral" ;
+ rdfs:subClassOf rdfs:Literal .
+
+rdf:Property
+ a rdfs:Class ;
+ rdfs:comment "The class of RDF properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Property" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdf:Seq
+ a rdfs:Class ;
+ rdfs:comment "The class of ordered containers." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Seq" ;
+ rdfs:subClassOf rdfs:Container .
+
+rdf:Statement
+ a rdfs:Class ;
+ rdfs:comment "The class of RDF statements." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Statement" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdf:XMLLiteral
+ a rdfs:Datatype ;
+ rdfs:comment "The class of XML literal values." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "XMLLiteral" ;
+ rdfs:subClassOf rdfs:Literal .
+
+rdf:first
+ a rdf:Property ;
+ rdfs:comment "The first item in the subject RDF list." ;
+ rdfs:domain rdf:List ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "first" ;
+ rdfs:range rdfs:Resource .
+
+rdf:nil
+ a rdf:List ;
+ rdfs:comment "The empty list, with no items in it. If the rest of a list is nil then the list has no more items in it." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "nil" .
+
+rdf:object
+ a rdf:Property ;
+ rdfs:comment "The object of the subject RDF statement." ;
+ rdfs:domain rdf:Statement ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "object" ;
+ rdfs:range rdfs:Resource .
+
+rdf:predicate
+ a rdf:Property ;
+ rdfs:comment "The predicate of the subject RDF statement." ;
+ rdfs:domain rdf:Statement ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "predicate" ;
+ rdfs:range rdfs:Resource .
+
+rdf:rest
+ a rdf:Property ;
+ rdfs:comment "The rest of the subject RDF list after the first item." ;
+ rdfs:domain rdf:List ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "rest" ;
+ rdfs:range rdf:List .
+
+rdf:subject
+ a rdf:Property ;
+ rdfs:comment "The subject of the subject RDF statement." ;
+ rdfs:domain rdf:Statement ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "subject" ;
+ rdfs:range rdfs:Resource .
+
+rdf:type
+ a rdf:Property ;
+ rdfs:comment "The subject is an instance of a class." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "type" ;
+ rdfs:range rdfs:Class .
+
+rdf:value
+ a rdf:Property ;
+ rdfs:comment "Idiomatic property used for structured values." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "value" ;
+ rdfs:range rdfs:Resource .
+
+<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ dc:description "This is the RDF Schema for the RDF vocabulary defined in the RDF namespace."@en ;
+ dc:title "The RDF Vocabulary (RDF)"@en ;
+ a owl:Ontology ;
+ rdfs:seeAlso <http://www.w3.org/2000/01/rdf-schema-more> .
+
+rdf:Alt
+ a rdfs:Class ;
+ rdfs:comment "The class of containers of alternatives." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Alt" ;
+ rdfs:subClassOf rdfs:Container .
+
+rdf:Bag
+ a rdfs:Class ;
+ rdfs:comment "The class of unordered containers." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Bag" ;
+ rdfs:subClassOf rdfs:Container .
+
+rdf:List
+ a rdfs:Class ;
+ rdfs:comment "The class of RDF Lists." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "List" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdf:PlainLiteral
+ a rdfs:Datatype ;
+ rdfs:comment "The class of plain (i.e. untyped) literal values." ;
+ rdfs:isDefinedBy <http://www.w3.org/TR/rdf-plain-literal/> ;
+ rdfs:label "PlainLiteral" ;
+ rdfs:subClassOf rdfs:Literal .
+
+rdf:Property
+ a rdfs:Class ;
+ rdfs:comment "The class of RDF properties." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Property" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdf:Seq
+ a rdfs:Class ;
+ rdfs:comment "The class of ordered containers." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Seq" ;
+ rdfs:subClassOf rdfs:Container .
+
+rdf:Statement
+ a rdfs:Class ;
+ rdfs:comment "The class of RDF statements." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "Statement" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdf:XMLLiteral
+ a rdfs:Datatype ;
+ rdfs:comment "The class of XML literal values." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "XMLLiteral" ;
+ rdfs:subClassOf rdfs:Literal .
+
+rdf:first
+ a rdf:Property ;
+ rdfs:comment "The first item in the subject RDF list." ;
+ rdfs:domain rdf:List ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "first" ;
+ rdfs:range rdfs:Resource .
+
+rdf:nil
+ a rdf:List ;
+ rdfs:comment "The empty list, with no items in it. If the rest of a list is nil then the list has no more items in it." ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "nil" .
+
+rdf:object
+ a rdf:Property ;
+ rdfs:comment "The object of the subject RDF statement." ;
+ rdfs:domain rdf:Statement ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "object" ;
+ rdfs:range rdfs:Resource .
+
+rdf:predicate
+ a rdf:Property ;
+ rdfs:comment "The predicate of the subject RDF statement." ;
+ rdfs:domain rdf:Statement ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "predicate" ;
+ rdfs:range rdfs:Resource .
+
+rdf:rest
+ a rdf:Property ;
+ rdfs:comment "The rest of the subject RDF list after the first item." ;
+ rdfs:domain rdf:List ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "rest" ;
+ rdfs:range rdf:List .
+
+rdf:subject
+ a rdf:Property ;
+ rdfs:comment "The subject of the subject RDF statement." ;
+ rdfs:domain rdf:Statement ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "subject" ;
+ rdfs:range rdfs:Resource .
+
+rdf:type
+ a rdf:Property ;
+ rdfs:comment "The subject is an instance of a class." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "type" ;
+ rdfs:range rdfs:Class .
+
+rdf:value
+ a rdf:Property ;
+ rdfs:comment "Idiomatic property used for structured values." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ;
+ rdfs:label "value" ;
+ rdfs:range rdfs:Resource .
--- /dev/null
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix grddl: <http://www.w3.org/2003/g/data-view#> .
+@prefix xml: <http://www.w3.org/XML/1998/namespace> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix vs: <http://www.w3.org/2003/06/sw-vocab-status/ns#> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix wot: <http://xmlns.com/wot/0.1/> .
+
+<http://www.w3.org/2000/01/rdf-schema#>
+ dc:title "The RDF Schema vocabulary (RDFS)" ;
+ a owl:Ontology ;
+ rdfs:seeAlso <http://www.w3.org/2000/01/rdf-schema-more> .
+
+rdfs:Class
+ a rdfs:Class ;
+ rdfs:comment "The class of classes." ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "Class" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdfs:Container
+ a rdfs:Class ;
+ rdfs:comment "The class of RDF containers." ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "Container" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdfs:ContainerMembershipProperty
+ a rdfs:Class ;
+ rdfs:comment """The class of container membership properties, rdf:_1, rdf:_2, ...,
+ all of which are sub-properties of 'member'.""" ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "ContainerMembershipProperty" ;
+ rdfs:subClassOf rdf:Property .
+
+rdfs:Datatype
+ a rdfs:Class ;
+ rdfs:comment "The class of RDF datatypes." ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "Datatype" ;
+ rdfs:subClassOf rdfs:Class .
+
+rdfs:Literal
+ a rdfs:Class ;
+ rdfs:comment "The class of literal values, eg. textual strings and integers." ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "Literal" ;
+ rdfs:subClassOf rdfs:Resource .
+
+rdfs:Resource
+ a rdfs:Class ;
+ rdfs:comment "The class resource, everything." ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "Resource" .
+
+rdfs:comment
+ a rdf:Property ;
+ rdfs:comment "A description of the subject resource." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "comment" ;
+ rdfs:range rdfs:Literal .
+
+rdfs:domain
+ a rdf:Property ;
+ rdfs:comment "A domain of the subject property." ;
+ rdfs:domain rdf:Property ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "domain" ;
+ rdfs:range rdfs:Class .
+
+rdfs:isDefinedBy
+ a rdf:Property ;
+ rdfs:comment "The defininition of the subject resource." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "isDefinedBy" ;
+ rdfs:range rdfs:Resource ;
+ rdfs:subPropertyOf rdfs:seeAlso .
+
+rdfs:label
+ a rdf:Property ;
+ rdfs:comment "A human-readable name for the subject." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "label" ;
+ rdfs:range rdfs:Literal .
+
+rdfs:member
+ a rdf:Property ;
+ rdfs:comment "A member of the subject resource." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "member" ;
+ rdfs:range rdfs:Resource .
+
+rdfs:range
+ a rdf:Property ;
+ rdfs:comment "A range of the subject property." ;
+ rdfs:domain rdf:Property ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "range" ;
+ rdfs:range rdfs:Class .
+
+rdfs:seeAlso
+ a rdf:Property ;
+ rdfs:comment "Further information about the subject resource." ;
+ rdfs:domain rdfs:Resource ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "seeAlso" ;
+ rdfs:range rdfs:Resource .
+
+rdfs:subClassOf
+ a rdf:Property ;
+ rdfs:comment "The subject is a subclass of a class." ;
+ rdfs:domain rdfs:Class ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "subClassOf" ;
+ rdfs:range rdfs:Class .
+
+rdfs:subPropertyOf
+ a rdf:Property ;
+ rdfs:comment "The subject is a subproperty of a property." ;
+ rdfs:domain rdf:Property ;
+ rdfs:isDefinedBy <http://www.w3.org/2000/01/rdf-schema#> ;
+ rdfs:label "subPropertyOf" ;
+ rdfs:range rdf:Property .
from datetime import datetime
from htsworkflow.util.rdfhelp import \
+ add_default_schemas, \
blankOrUri, \
+ dcNS, \
dump_model, \
fromTypedNode, \
get_model, \
+ guess_parser, \
+ guess_parser_by_extension, \
load_string_into_model, \
+ owlNS, \
+ rdfNS, \
rdfsNS, \
+ remove_schemas, \
toTypedNode, \
- simplifyUri, \
+ stripNamespace, \
+ simplify_uri, \
sanitize_literal, \
xsdNS
self.assertEqual(fromTypedNode(toTypedNode(long_datetime)),
long_datetime)
- def test_simplify_uri(self):
+ def test_strip_namespace_uri(self):
nsOrg = RDF.NS('example.org/example#')
nsCom = RDF.NS('example.com/example#')
term = 'foo'
node = nsOrg[term]
- self.failUnlessEqual(simplifyUri(nsOrg, node), term)
- self.failUnlessEqual(simplifyUri(nsCom, node), None)
- self.failUnlessEqual(simplifyUri(nsOrg, node.uri), term)
+ self.failUnlessEqual(stripNamespace(nsOrg, node), term)
+ self.failUnlessEqual(stripNamespace(nsCom, node), None)
+ self.failUnlessEqual(stripNamespace(nsOrg, node.uri), term)
- def test_simplify_uri_exceptions(self):
+ def test_strip_namespace_exceptions(self):
nsOrg = RDF.NS('example.org/example#')
nsCom = RDF.NS('example.com/example#')
node = toTypedNode('bad')
- self.failUnlessRaises(ValueError, simplifyUri, nsOrg, node)
- self.failUnlessRaises(ValueError, simplifyUri, nsOrg, nsOrg)
+ self.failUnlessRaises(ValueError, stripNamespace, nsOrg, node)
+ self.failUnlessRaises(ValueError, stripNamespace, nsOrg, nsOrg)
+
+ def test_simplify_uri(self):
+ DATA = [('http://asdf.org/foo/bar', 'bar'),
+ ('http://asdf.org/foo/bar#bleem', 'bleem'),
+ ('http://asdf.org/foo/bar/', 'bar'),
+ ('http://asdf.org/foo/bar?was=foo', 'was=foo')]
+
+ for uri, expected in DATA:
+ self.assertEqual(simplify_uri(uri), expected)
+
+ for uri, expected in DATA:
+ n = RDF.Uri(uri)
+ self.assertEqual(simplify_uri(n), expected)
+
+ for uri, expected in DATA:
+ n = RDF.Node(RDF.Uri(uri))
+ self.assertEqual(simplify_uri(n), expected)
+
+ # decoding literals is questionable
+ n = toTypedNode('http://foo/bar')
+ self.assertRaises(ValueError, simplify_uri, n)
def test_owl_import(self):
path, name = os.path.split(__file__)
- loc = 'file://'+os.path.abspath(path)+'/'
+ #loc = 'file://'+os.path.abspath(path)+'/'
+ loc = os.path.abspath(path)+'/'
model = get_model()
fragment = '''
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
hostile_result = """hi <b>there</b>"""
self.failUnlessEqual(str(hostile_sanitized), hostile_result)
+ def test_guess_parser_from_file(self):
+ DATA = [
+ ('/a/b/c.rdf', 'rdfxml'),
+ ('/a/b/c.xml', 'rdfxml'),
+ ('/a/b/c.html', 'rdfa'),
+ ('/a/b/c.turtle', 'turtle'),
+ ('http://foo.bar/bleem.turtle', 'turtle')]
+ for path, parser in DATA:
+ self.assertEqual(guess_parser_by_extension(path), parser)
+ self.assertEqual(guess_parser(None, path), parser)
+
+ DATA = [
+ ('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'),
+ ('application/x-turtle', 'http://a.org/b/c', 'turtle'),
+ ('text/html', 'http://a.org/b/c', 'rdfa'),
+ ('text/html', 'http://a.org/b/c.html', 'rdfa'),
+ ('text/plain', 'http://a.org/b/c.turtle', 'turtle'),
+ ('text/plain', 'http://a.org/b/c', 'guess')
+ ]
+ for contenttype, url, parser in DATA:
+ self.assertEqual(guess_parser(contenttype, url), parser)
+
+ class TestRDFSchemas(unittest.TestCase):
+ def test_rdf_schema(self):
+ """Does it basically work?
+ """
+ model = get_model()
+ self.assertEqual(model.size(), 0)
+ add_default_schemas(model)
+ self.assertGreater(model.size(), 0)
+ remove_schemas(model)
+ self.assertEqual(model.size(), 0)
+
+ def test_included_schemas(self):
+ model = get_model()
+ add_default_schemas(model)
+
+ # rdf test
+ s = RDF.Statement(rdfNS[''], dcNS['title'], None)
+ title = model.get_target(rdfNS[''], dcNS['title'])
+ self.assertTrue(title is not None)
+
+ s = RDF.Statement(rdfNS['Property'], rdfNS['type'], rdfsNS['Class'])
+ self.assertTrue(model.contains_statement(s))
+
+ # rdfs test
+ s = RDF.Statement(rdfsNS['Class'], rdfNS['type'], rdfsNS['Class'])
+ self.assertTrue(model.contains_statement(s))
+
+ s = RDF.Statement(owlNS['inverseOf'], rdfNS['type'],
+ rdfNS['Property'])
+ self.assertTrue(model.contains_statement(s))
+
def suite():
return unittest.makeSuite(TestRDFHelp, 'test')
--- /dev/null
+import unittest
+
+import RDF
+
+from htsworkflow.util.rdfhelp import get_model, \
+ add_default_schemas, add_schema, load_string_into_model, dump_model
+from htsworkflow.util.rdfns import *
+from htsworkflow.util.rdfinfer import Infer
+
+foafNS = RDF.NS('http://xmlns.com/foaf/0.1/')
+
+MINI_FOAF_ONTOLOGY = """
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+
+
+foaf:Agent
+ a rdfs:Class, owl:Class ;
+ rdfs:comment "An agent (person, group, software or physical artifiact)."@en;
+ rdfs:label "Agent" .
+
+foaf:Person
+ a rdfs:Class, owl:Class, foaf:Agent ;
+ rdfs:label "Person" .
+
+foaf:age
+ a rdf:Property, owl:DatatypeProperty, owl:FunctionalProperty ;
+ rdfs:comment "The age in years of some agent." ;
+ rdfs:domain foaf:Agent ;
+ rdfs:label "age";
+ rdfs:range rdfs:Literal .
+
+foaf:familyName
+ a rdf:Property, owl:DatatypeProperty ;
+ rdfs:comment "Family name of some person." ;
+ rdfs:label "familyName" ;
+ rdfs:domain foaf:Person ;
+ rdfs:range rdfs:Literal .
+
+foaf:firstName
+ a rdf:Property, owl:DatatypeProperty ;
+ rdfs:comment "the first name of a person." ;
+ rdfs:domain foaf:Person ;
+ rdfs:label "firstname" ;
+ rdfs:range rdfs:Literal .
+
+foaf:Document
+ a rdfs:Class, owl:Class ;
+ rdfs:comment "A document." .
+
+foaf:Image
+ a rdfs:Class, owl:Class ;
+ rdfs:comment "An image." ;
+ rdfs:subClassOf foaf:Document .
+
+foaf:depicts
+ a rdf:Property, owl:ObjectProperty ;
+ rdfs:comment "A thing depicted in this representation." ;
+ rdfs:domain foaf:Image ;
+ rdfs:range owl:Thing ;
+ owl:inverseOf foaf:depiction .
+
+foaf:depiction
+ a rdf:Property, owl:ObjectProperty ;
+ rdfs:comment "Depiction of some thing." ;
+ rdfs:domain owl:Thing ;
+ rdfs:range foaf:Image ;
+ owl:inverseOf foaf:depicts .
+"""
+
+FOAF_DATA = """
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+
+_:me
+ foaf:firstName "Diane" ;
+ foaf:familyName "Trout" ;
+ a foaf:Person, owl:Thing ;
+ <http://example.org/other_literal> "value" ;
+ <http://example.org/other_resource> <http://example.org/resource> .
+
+<http://example.org/me.jpg>
+ a foaf:Image, owl:Thing ;
+ foaf:depicts _:me .
+"""
+
+class TestInfer(unittest.TestCase):
+ def setUp(self):
+ self.model = get_model()
+ add_default_schemas(self.model)
+ load_string_into_model(self.model, 'turtle', MINI_FOAF_ONTOLOGY)
+
+ def test_class(self):
+ fooNS = RDF.NS('http://example.org/')
+ load_string_into_model(self.model, 'turtle', FOAF_DATA)
+ inference = Infer(self.model)
+
+ s = RDF.Statement(fooNS['me.jpg'], rdfNS['type'], rdfsNS['Class'])
+ found = list(self.model.find_statements(s))
+ self.assertEqual(len(found), 0)
+ inference._rule_class()
+ s = RDF.Statement(fooNS['me.jpg'], rdfNS['type'], rdfsNS['Class'])
+ found = list(self.model.find_statements(s))
+ self.assertEqual(len(found), 1)
+
+ def test_inverse_of(self):
+ fooNS = RDF.NS('http://example.org/')
+ load_string_into_model(self.model, 'turtle', FOAF_DATA)
+ inference = Infer(self.model)
+ depiction = RDF.Statement(None,
+ foafNS['depiction'],
+ fooNS['me.jpg'])
+ size = self.model.size()
+ found_statements = list(self.model.find_statements(depiction))
+ self.assertEqual(len(found_statements), 0)
+ inference._rule_inverse_of()
+ found_statements = list(self.model.find_statements(depiction))
+ self.assertEqual(len(found_statements), 1)
+
+ # we should've added one statement.
+ self.assertEqual(self.model.size(), size + 1)
+
+ size = self.model.size()
+ inference._rule_inverse_of()
+ # we should already have both versions in our model
+ self.assertEqual(self.model.size(), size)
+
+ def test_validate_types(self):
+ fooNS = RDF.NS('http://example.org/')
+ load_string_into_model(self.model, 'turtle', FOAF_DATA)
+ inference = Infer(self.model)
+
+ errors = list(inference._validate_types())
+ self.assertEqual(len(errors), 0)
+
+ s = RDF.Statement(fooNS['document'],
+ dcNS['title'],
+ RDF.Node("bleem"))
+ self.model.append(s)
+ errors = list(inference._validate_types())
+ self.assertEqual(len(errors), 1)
+
+ def test_validate_undefined_properties(self):
+ fooNS = RDF.NS('http://example.org/')
+ inference = Infer(self.model)
+
+ errors = list(inference._validate_undefined_properties())
+ self.assertEqual(len(errors), 0)
+
+ load_string_into_model(self.model, 'turtle', FOAF_DATA)
+
+ errors = list(inference._validate_undefined_properties())
+ self.assertEqual(len(errors), 2)
+
+
+ def test_validate_undefined_properties(self):
+ fooNS = RDF.NS('http://example.org/')
+ foafNS = RDF.NS('http://xmlns.com/foaf/0.1/')
+ load_string_into_model(self.model, 'turtle', FOAF_DATA)
+ inference = Infer(self.model)
+
+ errors = list(inference._validate_property_types())
+ self.assertEqual(len(errors), 0)
+
+ s = RDF.Statement(fooNS['me.jpg'],
+ foafNS['firstName'],
+ RDF.Node("name"))
+ self.model.append(s)
+ errors = list(inference._validate_property_types())
+ self.assertEqual(len(errors), 1)
+ self.assertTrue(errors[0].startswith('Domain of http://example.org'))
+ del self.model[s]
+
+ errors = list(inference._validate_property_types())
+ self.assertEqual(len(errors), 0)
+ s = RDF.Statement(fooNS['foo.txt'], rdfNS['type'], foafNS['Document'])
+ self.model.append(s)
+ s = RDF.Statement(fooNS['me.jpg'],
+ foafNS['depicts'],
+ foafNS['foo.txt'])
+ self.model.append(s)
+
+ errors = list(inference._validate_property_types())
+ self.assertEqual(len(errors), 1)
+ self.assertTrue(errors[0].startswith('Range of http://example.org'))
+ del self.model[s]
+
+def suite():
+ return unittest.makeSuite(TestInfer, 'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest='suite')
-from setuptools import setup
+from setuptools import setup, find_packages
from version import get_git_version
setup(
description="Utilities to help manage high-through-put sequencing",
author="Diane Trout, Brandon King",
author_email="diane@caltech.edu",
- packages=["htsworkflow",
- "htsworkflow.automation",
- "htsworkflow.pipelines",
- "htsworkflow.util",
- # django site
- "htsworkflow.frontend",
- "htsworkflow.frontend.analysis",
- "htsworkflow.frontend.eland_config",
- "htsworkflow.frontend.experiments",
- "htsworkflow.frontend.inventory",
- "htsworkflow.frontend.reports",
- "htsworkflow.frontend.samples",
- ],
+ packages=find_packages(),
scripts=[
"scripts/htsw-copier",
"scripts/htsw-eland2bed",
"scripts/htsw-update-archive",
"scripts/htsw-validate",
],
+ package_data = {
+ '': ['*.turtle']
+ },
)