# redland rdf lib
import RDF
import sys
-import urllib
-import urlparse
+import urllib.request, urllib.parse, urllib.error
+import urllib.parse
if not 'DJANGO_SETTINGS_MODULE' in os.environ:
os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings'
from htsworkflow.submission import daf, ucsc
from htsworkflow.util import api
+from htsworkflow.util.rdfns import *
from htsworkflow.util.rdfhelp import \
- dafTermOntology, \
- dublinCoreNS, \
get_model, \
get_serializer, \
sparql_query, \
submissionOntology, \
libraryOntology, \
- load_into_model, \
- rdfNS, \
- rdfsNS, \
- xsdNS
+ load_into_model
TYPE_N = rdfNS['type']
CREATION_DATE = libraryOntology['date']
if opts.print_rdf:
serializer = get_serializer(name=opts.rdf_parser_name)
- print serializer.serialize_model_to_string(model)
+ print(serializer.serialize_model_to_string(model))
def make_parser():
def load_my_submissions(model, limit=None, cookie=None):
- """Parse all the submissions from UCSC into model
+ """Parse all of my submissions from encodesubmit into model
It will look at the global USER_URL to figure out who to scrape
cookie contains the session cookie, if none, will attempt to login
"""
for row in results:
subid = row['subid']
name = row['name']
- print "# {0}".format(name)
- print "<{0}>".format(subid.uri)
- print " encodeSubmit:library_urn "\
- "<http://jumpgate.caltech.edu/library/> ."
- print ""
+ print("# {0}".format(name))
+ print("<{0}>".format(subid.uri))
+ print(" encodeSubmit:library_urn "\
+ "<http://jumpgate.caltech.edu/library/> .")
+ print("")
def find_submissions_with_no_library(model):
missing_lib_query_text = """
load_library_detail(model, library_urn)
def user_library_id_to_library_urn(library_id):
- split_url = urlparse.urlsplit(library_id)
+ split_url = urllib.parse.urlsplit(library_id)
if len(split_url.scheme) == 0:
return LIBRARY_NS[library_id]
else:
if file_index is None:
return
- for filename, attributes in file_index.items():
+ lib_term = submissionOntology['library_urn']
+ sub_term = submissionOntology['submission_urn']
+ for filename, attributes in list(file_index.items()):
s = RDF.Node(RDF.Uri(filename))
model.add_statement(
RDF.Statement(s, TYPE_N, submissionOntology['ucsc_track']))
- for name, value in attributes.items():
+ for name, value in list(attributes.items()):
p = RDF.Node(DCC_NS[name])
o = RDF.Node(value)
model.add_statement(RDF.Statement(s,p,o))
+ if name.lower() == 'labexpid':
+ model.add_statement(
+ RDF.Statement(s, lib_term, LIBRARY_NS[value+'/']))
+ elif name.lower() == 'subid':
+ sub_url = RDF.Uri(submission_view_url(value))
+ model.add_statement(
+ RDF.Statement(s, sub_term, sub_url))
def load_library_detail(model, libraryUrn):
try:
body = get_url_as_text(str(libraryUrn.uri), 'GET')
rdfaParser.parse_string_into_model(model, body, libraryUrn.uri)
- except httplib2.HttpLib2ErrorWithResponse, e:
+ except httplib2.HttpLib2ErrorWithResponse as e:
LOGGER.error(str(e))
elif len(results) == 1:
pass # Assuming that a loaded dataset has one record
response, content = http.request(LOGIN_URL,
'POST',
headers=headers,
- body=urllib.urlencode(credentials))
+ body=urllib.parse.urlencode(credentials))
LOGGER.debug("Login to {0}, status {1}".format(LOGIN_URL,
response['status']))
for lib_id, subobj in subl:
libraries.setdefault(lib_id, []).append(subobj)
- for submission in libraries.values():
+ for submission in list(libraries.values()):
submission.sort(key=attrgetter('date'), reverse=True)
return libraries