import logging
import json
import jsonschema
+import numpy
import os
import re
import requests
"""
accession_name = self.get_accession_name(collection)
+ to_create = self.prepare_objects_from_sheet(collection, sheet)
+
created = []
- columns = sheet.columns
- tosubmit = sheet[pandas.isnull(sheet[accession_name])]
+ accessions = []
+ uuids = []
+ for i, new_object in to_create:
+ if new_object:
+ accession = new_object.get('accession')
+ uuid = new_object.get('uuid')
+ description = new_object.get('description')
+
+ posted_object = self.post_object_from_row(
+ collection, i, new_object, dry_run, verbose
+ )
+ created.append(posted_object)
+
+ if posted_object:
+ accession = posted_object.get('accession')
+ uuid = posted_object.get('uuid')
+ description = posted_object.get('description')
+
+ accessions.append(accession)
+ uuids.append(uuid)
+
+ LOGGER.info('row {} ({}) -> {}'.format(
+ (i+2), description, accession))
+ # +2 comes from python row index + 1 to convert to
+ # one based indexing + 1 to account for
+ # row removed by header parsing
+ else:
+ accessions.append(numpy.nan)
+ uuids.append(numpy.nan)
+
+ if accession_name in sheet.columns:
+ sheet[accession_name] = accessions
+ if 'uuid' in sheet.columns:
+ sheet['uuid'] = uuids
- for i in tosubmit.index:
- row = tosubmit.ix[i]
+ return created
+
+ def prepare_objects_from_sheet(self, collection, sheet):
+ accession_name = self.get_accession_name(collection)
+ to_create = []
+ for i, row in sheet.iterrows():
new_object = {}
- for k in columns:
- if pandas.notnull(row[k]):
- name, value = typed_column_parser(k, row[k])
+ for name, value in row.items():
+ if pandas.notnull(value):
+ name, value = typed_column_parser(name, value)
if name is None:
continue
new_object[name] = value
- try:
- self.validate(new_object, collection)
- except jsonschema.ValidationError as e:
- LOGGER.error("Validation error row %s", i)
- raise e
+ if new_object and new_object.get(accession_name) is None:
+ try:
+ self.validate(new_object, collection)
+ except jsonschema.ValidationError as e:
+ LOGGER.error("Validation error row %s", i)
+ raise e
+ to_create.append((i, new_object))
- accession = row[accession_name]
- description = row.get('description', None)
+ else:
+ to_create.append((i, None))
- if not dry_run:
- response = self.post_json(collection, new_object)
- if verbose:
- print("Reponse {}".format(response))
+ return to_create
- obj = response['@graph'][0]
- created.append(obj)
- accession = obj.get('accession')
- uuid = obj.get('uuid')
+ def post_object_from_row(self, collection, i, new_object,
+ dry_run=True, verbose=True):
+ accession_name = self.get_accession_name(collection)
- if accession:
- sheet[accession_name][i] = accession
- else:
- accession = uuid
+ if not dry_run:
+ response = self.post_json(collection, new_object)
+ if verbose:
+ print("Reponse {}".format(response))
- if 'uuid' in columns and pandas.isnull(sheet['uuid'][i]):
- sheet['uuid'][i] = uuid
+ obj = response['@graph'][0]
- print("row {} created: {}".format(i, accession))
- else:
- created.append(new_object)
- LOGGER.info('row {} ({}) -> {}'.format(i, description, accession))
+ accession = obj.get(accession_name)
+ if not accession:
+ accession = obj.get('uuid')
- return created
+ print("row {} created: {}".format(i, accession))
+ return obj
+ else:
+ new_object[accession_name] = 'would create'
+ return new_object
def prepare_url(self, request_url):
'''This attempts to provide some convienence for accessing a URL