import RDF
import sys
import urllib
+import urlparse
from htsworkflow.util import api
+from htsworkflow.util.rdfhelp import \
+ dublinCoreNS, \
+ get_model, \
+ get_serializer, \
+ sparql_query, \
+ submissionOntology, \
+ libraryOntology, \
+ load_into_model, \
+ rdfNS, \
+ rdfsNS, \
+ xsdNS
+
+# URL mappings
+libraryNS = RDF.NS("http://jumpgate.caltech.edu/library/")
+
+from htsworkflow.submission.ucsc import submission_view_url, UCSCEncodePipeline
+download_ddf = UCSCEncodePipeline+"download_ddf#"
+ddfNS = RDF.NS(download_ddf)
+
DBDIR = os.path.expanduser("~diane/proj/submission")
logger = logging.getLogger("encode_find")
-libraryNS = RDF.NS("http://jumpgate.caltech.edu/library/")
-submissionNS = RDF.NS("http://encodesubmit.ucsc.edu/pipeline/show/")
-submitOntologyNS = RDF.NS("http://jumpgate.caltech.edu/wiki/UCSCSubmissionOntology#")
-ddfNS = RDF.NS("http://encodesubmit.ucsc.edu/pipeline/download_ddf#")
-libOntNS = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
-
-dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
-rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
-rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
-xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
-
LOGIN_URL = 'http://encodesubmit.ucsc.edu/account/login'
USER_URL = 'http://encodesubmit.ucsc.edu/pipeline/show_user'
htswapi = api.HtswApi(opts.host, htsw_authdata)
cookie = None
- model = get_model(opts.load_model)
+ model = get_model(opts.load_model, DBDIR)
if opts.load_rdf is not None:
- load_into_model(model, opts.rdf_parser_name, opts.load_rdf)
+ ns_uri = submissionOntology[''].uri
+ load_into_model(model, opts.rdf_parser_name, opts.load_rdf, ns_uri)
if opts.update:
cookie = login(cookie=cookie)
missing = find_submissions_with_no_library(model)
if opts.print_rdf:
- serializer = RDF.Serializer(name=opts.rdf_parser_name)
+ serializer = get_serializer(name=opts.rdf_parser_name)
print serializer.serialize_model_to_string(model)
return parser
-def get_model(model_name=None):
- if model_name is None:
- storage = RDF.MemoryStorage()
- else:
- storage = RDF.HashStorage(model_name,
- options="hash-type='bdb',dir='{0}'".format(DBDIR))
- model = RDF.Model(storage)
- return model
-
def load_my_submissions(model, cookie=None):
if cookie is None:
cookie = login()
# first record is header
tr = tr.findNext()
TypeN = rdfsNS['type']
- NameN = submitOntologyNS['name']
- SpeciesN = submitOntologyNS['species']
- LibraryURN = submitOntologyNS['library_urn']
+ NameN = submissionOntology['name']
+ SpeciesN = submissionOntology['species']
+ LibraryURN = submissionOntology['library_urn']
while tr is not None:
td = tr.findAll('td')
if td is not None and len(td) > 1:
subUrnText = td[0].contents[0].contents[0].encode(CHARSET)
- subUrn = submissionNS[subUrnText]
+ subUrn = RDF.Uri(submission_view_url(subUrnText))
- add_stmt(model, subUrn, TypeN, submitOntologyNS['Submission'])
+ add_stmt(model, subUrn, TypeN, submissionOntology['Submission'])
name = get_contents(td[4])
add_stmt(model, subUrn, NameN, name)
def add_submission_to_library_urn(model, submissionUrn, predicate, library_id):
"""Add a link from a UCSC submission to woldlab library if needed
"""
- libraryUrn = libraryNS[library_id]
+ libraryUrn = libraryNS[library_id+'/']
query = RDF.Statement(submissionUrn, predicate, libraryUrn)
if not model.contains_statement(query):
- link = RDF.Statement(submissionUrn, predicate, libraryNS[library_id])
+ link = RDF.Statement(submissionUrn, predicate, libraryUrn)
logger.info("Adding Sub -> Lib link: {0}".format(link))
model.add_statement(link)
else:
?subid submissionOntology:name ?name
OPTIONAL {{ ?subid submissionOntology:library_urn ?libid }}
FILTER (!bound(?libid))
-}}""".format(submissionOntology=submitOntologyNS[''].uri)
+}}""".format(submissionOntology=submissionOntology[''].uri)
)
results = missing_lib_query.execute(model)
def add_submission_creation_date(model, subUrn, cookie):
# in theory the submission page might have more information on it.
- creationDateN = libOntNS['date']
+ creationDateN = libraryOntology['date']
dateTimeType = xsdNS['dateTime']
query = RDF.Statement(subUrn, creationDateN, None)
creation_dates = list(model.find_statements(query))
if len(creation_dates) == 0:
logger.info("Getting creation date for: {0}".format(str(subUrn)))
- soup = get_url_as_soup(str(subUrn.uri), 'GET', cookie)
+ soup = get_url_as_soup(str(subUrn), 'GET', cookie)
created_label = soup.find(text="Created: ")
if created_label:
created_date = get_date_contents(created_label.next)
logger.debug("Found creation date for: {0}".format(str(subUrn)))
def update_submission_detail(model, subUrn, status, recent_update, cookie):
- HasStatusN = submitOntologyNS['has_status']
- StatusN = submitOntologyNS['status']
- LastModifyN = submitOntologyNS['last_modify_date']
+ HasStatusN = submissionOntology['has_status']
+ StatusN = submissionOntology['status']
+ LastModifyN = submissionOntology['last_modify_date']
status_nodes_query = RDF.Statement(subUrn, HasStatusN, None)
status_nodes = list(model.find_statements(status_nodes_query))
download_ddf_url = str(subUrn).replace('show', 'download_ddf')
ddfUrn = RDF.Uri(download_ddf_url)
- status_is_ddf = RDF.Statement(statusNode, TypeN, ddfNS['ddf'])
+ status_is_ddf = RDF.Statement(statusNode, TypeN, ddfNS[''])
if not model.contains_statement(status_is_ddf):
logging.info('Adding ddf to {0}, {1}'.format(subUrn, statusNode))
ddf_text = get_url_as_text(download_ddf_url, 'GET', cookie)
for f in files:
fileNode = RDF.Node()
- add_stmt(model, statusNode, submitOntologyNS['has_file'], fileNode)
+ add_stmt(model, statusNode, submissionOntology['has_file'], fileNode)
add_stmt(model, fileNode, rdfsNS['type'], ddfNS['file'])
add_stmt(model, fileNode, ddfNS['filename'], f)
rdfaParser = RDF.Parser(name='rdfa')
print encodeUrl
rdfaParser.parse_into_model(model, encodeUrl)
- query = RDF.Statement(None, libOntNS['library_id'], None)
+ query = RDF.Statement(None, libraryOntology['library_id'], None)
libraries = model.find_statements(query)
for statement in libraries:
libraryUrn = statement.subject
"""Grab detail information from library page
"""
rdfaParser = RDF.Parser(name='rdfa')
- query = RDF.Statement(libraryUrn, libOntNS['date'], None)
+ query = RDF.Statement(libraryUrn, libraryOntology['date'], None)
results = list(model.find_statements(query))
if len(results) == 0:
logger.info("Loading {0}".format(str(libraryUrn)))
else:
return None
-def sparql_query(model, query_filename):
- """Execute sparql query from file
- """
- query_body = open(query_filename,'r').read()
- query = RDF.SPARQLQuery(query_body)
- results = query.execute(model)
- for row in results:
- output = []
- for k,v in row.items()[::-1]:
- print "{0}: {1}".format(k,v)
- print
-
-def load_into_model(model, parser_name, filename):
- if not os.path.exists(filename):
- raise IOError("Can't find {0}".format(filename))
-
- data = open(filename, 'r').read()
- rdf_parser = RDF.Parser(name=parser_name)
- ns_uri = submitOntologyNS[''].uri
- rdf_parser.parse_string_into_model(model, data, ns_uri)
-
def add_stmt(model, subject, predicate, object):
"""Convienence create RDF Statement and add to a model
"""
RDF.Statement(subject, predicate, object)
)
+
def login(cookie=None):
"""Login if we don't have a cookie
"""
for d in freezes:
report.append('<td>')
for s in batched.get(d, []):
- show_url = submissionNS[s.subid].uri
+ show_url = submission_view_url(s.subid)
subid = '<a href="{0}">{1}</a>'.format(show_url, s.subid)
report.append("{0}:{1}".format(subid, s.status))
report.append('</td>')