7 logger = logging.getLogger('ImportEncoded')
9 from sqlalchemy.ext.declarative import declarative_base
10 from sqlalchemy import Column, Integer, String, create_engine
11 from sqlalchemy.dialects.postgresql import UUID, JSONB
12 from sqlalchemy.orm import sessionmaker
14 Base = declarative_base()
17 __tablename__ = 'item'
19 uuid = Column(UUID, primary_key=True)
21 object_type = Column(String)
22 payload = Column(JSONB)
28 object_type = row['@type'][0]
35 return Item(uri=uri, uuid=uuid, object_type=object_type, payload=payload)
38 def create_session(engine):
39 session = sessionmaker(bind=engine)
42 def load_data(session, graph):
46 for i, row in enumerate(graph):
48 if obj_id not in seen_pkeys:
49 session.add(create_item(row))
50 seen_pkeys.add(obj_id)
52 duplicates.setdefault(obj_id, []).append(row)
54 if (i + 1) % 10000 == 0:
56 print("{} of {}".format(i+1, len(graph)))
60 def load_dump(filename):
61 logger.info("Creating schema")
62 engine = create_engine('postgresql://felcat.caltech.edu/encoded')
63 Base.metadata.create_all(engine)
64 sessionfactory = sessionmaker(bind=engine)
65 session = sessionfactory()
67 logger.info("Parsing %s", filename)
68 with open(filename, 'r') as instream:
69 data = json.load(instream)
71 graph = data['@graph']
72 logging.info("Loading")
73 collisions = load_data(session, graph)
75 with open('bad.txt', 'a') as outstream:
76 outstream.write(pprint.pformat(collisions))
78 def main(cmdline=None):
79 parser = argparse.ArgumentParser()
80 parser.add_argument('filename', nargs=1, help='json dump file to load')
82 args = parser.parse_args(cmdline)
84 logging.basicConfig(level=logging.DEBUG)
85 for filename in args.filename:
88 if __name__ == '__main__':