Update schema tests for upstreams release 33
[htsworkflow.git] / htsworkflow / submission / test / library.json
1 {
2     "title": "Library",
3     "description": "Schema for submitting a molecular library.",
4     "id": "/profiles/library.json",
5     "$schema": "http://json-schema.org/draft-04/schema#",
6     "type": "object",
7     "required": [ "award", "lab", "nucleic_acid_term_id"],
8     "identifyingProperties": ["uuid","accession", "aliases"],
9     "additionalProperties": false,
10     "mixinProperties": [
11         { "$ref": "mixins.json#/schema_version" },
12         { "$ref": "mixins.json#/uuid" },
13         { "$ref": "mixins.json#/accession" },
14         { "$ref": "mixins.json#/aliases" },
15         { "$ref": "mixins.json#/attribution" },
16         { "$ref": "mixins.json#/accessioned_status" },
17         { "$ref": "mixins.json#/submitted" },
18         { "$ref": "mixins.json#/source" },
19         { "$ref": "mixins.json#/product_id" },
20         { "$ref": "mixins.json#/lot_id" },
21         { "$ref": "mixins.json#/notes" }
22     ],
23     "facets": {
24         "status": {
25             "title": "Library status",
26             "type": "string"
27         }
28     },
29     "dependencies": {
30         "nucleic_acid_term_id": ["nucleic_acid_term_name"],
31         "nucleic_acid_term_name": ["nucleic_acid_term_id"],
32         "nucleic_acid_starting_quantity_units": ["nucleic_acid_starting_quantity"],
33         "nucleic_acid_starting_quantity": ["nucleic_acid_starting_quantity_units"],
34         "depleted_in_term_name": ["depleted_in_term_id"],
35         "depleted_in_term_id": ["depleted_in_term_name"],
36         "product_id": ["source"],
37         "lot_id": ["source", "product_id"]
38     },
39     "properties": {
40         "schema_version": {
41             "default": "4"
42         },
43         "accession": {
44             "accessionType": "LB"
45         },
46         "spikeins_used": {
47             "title": "Spike-ins datasets used",
48             "description": "The datasets containing the fasta and the concentrations of the library spike-ins.",
49             "type": "array",
50             "default": [],
51             "items" : {
52                 "title": "A spike-ins dataset.",
53                 "description": "A specific spike-ins type dataset",
54                 "comment": "See dataset.json for available identifiers.",
55                 "type": "string",
56                 "linkTo": "Dataset"
57             }
58         },
59         "biosample": {
60             "title": "Biosample",
61             "description": "The biosample that nucleic acid was isolated from to generate the library.",
62             "comment": "See biosample.json for available identifiers.",
63             "type": "string",
64             "linkTo": "Biosample"
65         },
66         "product_id": {
67             "description": "The product identifier provided by the vendor, for nucleic acids or proteins purchased directly from a vendor (e.g. total RNA)."
68         },
69         "lot_id":{
70             "description": "The lot identifier provided by the vendor, for nucleic acids or proteins purchased directly from a vendor (e.g. total RNA)."
71         },
72         "source": {
73             "description": "The vendor, for nucleic acids or proteins purchased directly from a vendor (e.g. total RNA)."
74         },
75         "nucleic_acid_term_name": {
76             "@id": null,
77             "title": "Molecule term",
78             "description": "SO (Sequence Ontology) term best matching the molecule isolated to generate the library (e.g. 'RNA' for a total RNA library, even if that library is subsequently reverse transcribed for DNA sequencing.)",
79             "type": "string",
80             "enum": [
81                 "DNA",
82                 "RNA",
83                 "polyadenylated mRNA",
84                 "miRNA",
85                 "protein"
86             ]
87         },
88         "nucleic_acid_term_id": {
89             "@type": "@id",
90             "title": "Molecule ID",
91             "description": "SO (Sequence Ontology) identifier best matching the nucleic acid isolated to generate the library",
92             "comment": "Based on the choice in nucleic_acid_term_name use the following guide: DNA - SO:0000352, RNA - SO:0000356,  polyadenylated mRNA - SO:0000871, miRNA - SO:0000276 or protein - SO:0000104" ,
93             "type": "string",
94             "enum": [
95                 "SO:0000352",
96                 "SO:0000356",
97                 "SO:0000871",
98                 "SO:0000276",
99                 "SO:0000104"
100             ]
101         },
102         "documents": {
103             "title": "Protocol documents",
104             "description": "Documents that describe the preparation of the library.",
105             "type": "array",
106             "default": [],
107             "items": {
108                 "title": "Protocol document",
109                 "description": "A document that describe the preparation of the library. ",
110                 "comment": "See document.json for available identifiers.",
111                 "type": "string",
112                 "linkTo": "Document"
113             }
114         },
115         "dbxrefs": {
116             "@type": "@id",
117             "rdfs:subPropertyOf": "rdfs:seeAlso",
118             "title": "External identifiers",
119             "description": "Unique identifiers from external resources.",
120             "type": "array",
121             "default": [],
122             "items": {
123                 "title": "External identifier",
124                 "description": "A unique identifier from external resource.",
125                 "type":  "string",
126                 "pattern": "^GEO:GSM\\d+$"
127             }
128         },
129         "nucleic_acid_starting_quantity": {
130             "title": "Nucleic acid starting quantity",
131             "description": "The starting amount of nucleic acid before selection and purification.",
132             "type": "string",
133             "pattern": "[0-9]+"
134         },
135         "nucleic_acid_starting_quantity_units": {
136             "title": "Nucleic acid starting quantity units",
137             "description": "The units used for starting amount of nucleic acid.",
138             "type": "string",
139             "enum": [
140                 "cells",
141                 "cell-equivalent",
142                 "µg",
143                 "ng",
144                 "pg",
145                 "mg"
146             ]
147         },
148         "extraction_method": {
149             "title": "Extraction method",
150             "description": "A short description or reference of the nucleic acid extraction protocol used in library preparation, if applicable.",
151             "type": "string",
152             "XXXenum": [
153                 "miRNeasy Mini kit (QIAGEN cat#:217004)",
154                 "Trizol (LifeTech cat#: 15596-018)",
155                 "Ambion mirVana",
156                 "Qiagen #74204",
157                 "QIAGEN DNeasy Blood & Tissue Kit",
158                 "see document",
159                 "n/a"
160             ],
161             "format": "semi-controlled"
162         },
163         "fragmentation_method": {
164             "title": "Fragmentation method",
165             "description": "A short description or reference of the nucleic acid fragmentation protocol used in library preparation, if applicable.",
166             "type": "string",
167             "enum": [
168                 "chemical (generic)",
169                 "chemical (DnaseI)",
170                 "chemical (HindIII/DpnII restriction)",
171                 "chemical (Tn5 transposase)",
172                 "chemical (micrococcal nuclease)",
173                 "chemical (Illumina TruSeq)",
174                 "chemical (Nextera tagmentation)",
175                 "shearing (generic)",
176                 "shearing (Covaris generic)",
177                 "shearing (Covaris S2)",
178                 "sonication (generic)",
179                 "sonication (Bioruptor generic)",
180                 "sonication (Bioruptor Plus)",
181                 "sonication (Bioruptor Twin)",
182                 "sonication (generic microtip)",
183                 "sonication (Branson Sonifier 450)",
184                 "shearing (Covaris LE Series)",
185                 "see document",
186                 "none",
187                 "n/a"
188             ]
189         },
190         "fragmentation_date":{
191             "title": "Fragmentation date",
192             "description": "The date that the nucleic acid was fragmented.",
193             "comment": "Date can be submitted in as YYYY-MM-DD or YYYY-MM-DDTHH:MM:SSTZD (TZD is the time zone designator; use Z to express time in UTC or for time expressed in local time add a time zone offset from UTC +HH:MM or -HH:MM).",
194             "type": "string",
195             "anyOf": [
196                 {"format": "date-time"},
197                 {"format": "date"}
198             ]
199         },
200         "library_size_selection_method": {
201             "title": "Size selection method",
202             "description": "A short description or reference of the size selection protocol used in library preparation, if applicable.",
203             "type": "string",
204             "XXXenum": [
205                 "gel",
206                 "see document",
207                 "SPRI beads"
208             ],
209             "format": "semi-controlled"
210         },
211         "lysis_method": {
212             "title": "Lysis method",
213             "description": "A short description or reference of the cell lysis protocol used in library preparation, if applicable",
214             "type": "string",
215             "XXXenum": [
216                 "miRNeasy Mini kit (QIAGEN cat#:217004)",
217                 "Trizol (LifeTech cat#: 15596-018)",
218                 "Ambion mirVana",
219                 "Qiagen #74204",
220                 "QIAGEN DNeasy Blood & Tissue Kit",
221                 "see document",
222                 "n/a"
223             ],
224             "format": "semi-controlled"
225         },
226         "crosslinking_method": {
227             "title": "Crosslinking method",
228             "description": "A short description or reference of the crosslinking protocol used in library preparation, if applicable.",
229             "type": "string",
230             "enum": [
231                 "formaldehyde",
232                 "ultraviolet irradiation"
233             ]
234         },
235         "size_range": {
236             "title": "Size range",
237             "description": "The measured size range of the purified nucleic acid, in bp.",
238             "type": "string",
239             "pattern": "(^[0-9]+-[0-9]+$|^[<>][0-9]+$)"
240         },
241         "strand_specificity": {
242             "title": "Strand specificity",
243             "description": "The preparation of the library using a strand-specific protocol.",
244             "type": "boolean",
245             "default": false
246         },
247         "treatments": {
248             "title": "Treatments",
249             "type": "array",
250             "default": [],
251             "items": {
252                 "title": "Treatment",
253                 "comment": "See treatment.json for available identifiers.",
254                 "type": "string",
255                 "linkTo": "Treatment"
256             }
257         },
258         "depleted_in_term_name": {
259             "@id": null,
260             "type": "array",
261             "title": "Depleted in term",
262             "description": "SO (Sequence Ontology) term best matching the nucleic acid that was diminished from the library.",
263             "default": [],
264             "items": {
265                 "type": "string",
266                 "enum": [
267                     "rRNA",
268                     "polyadenylated mRNA",
269                     "capped mRNA"
270                 ]
271             }
272         },
273         "depleted_in_term_id": {
274             "@type": "@id",
275             "type": "array",
276             "title": "Depleted in ID",
277             "description": "SO (Sequence Ontology) identifier best matching the nucleic acid that was diminished from the library.",
278             "comment": "Based on the choice in depleted_in_term_name use the following guide: rRNA - SO:0000252,  polyadenylated mRNA - SO:0000871 or capped mRNA - SO:0000862" ,
279             "default": [],
280             "items": {
281                 "type": "string",
282                 "enum": [
283                     "SO:0000252",
284                     "SO:0000871",
285                     "SO:0000862"
286                 ]
287             }
288         }
289     },
290     "columns": {
291         "accession": {
292             "title": "Accession",
293             "type": "string"
294         },
295         "award": {
296             "title": "Award",
297             "type": "string"
298         },
299         "lab": {
300             "title": "Lab",
301             "type": "string"
302         },
303         "biosample.biosample_term_name": {
304             "title": "Biosample",
305             "type": "string"
306         },
307         "biosample.organism.name": {
308             "title": "Species",
309             "type": "string"
310         },
311         "nucleic_acid_term_name": {
312             "title": "Nucleic Acid Term Name",
313             "type": "string"
314         }
315     },
316     "boost_values": {
317         "accession": 1.0,
318         "alternate_accessions": 1.0,
319         "aliases": 1.0,
320         "biosample.accession": 1.0,
321         "biosample.alternate_accessions": 1.0,
322         "biosample.aliases": 1.0,
323         "biosample.donor.accession": 1.0,
324         "biosample.donor.organism.name": 1.0
325     },
326     "changelog": "/profiles/changelogs/library.md"
327 }