3 print "%s: version 1.0" % sys.argv[0]
5 print "usage: python %s infile.gff outfile.cis\n" % sys.argv[0]
6 print "\tTHIS SCRIPT WILL MOST LIKELY NEED TO BE EDITED FOR YOUR GFF FILE\n"
10 # Cistematic just want's a use set of exons labeled "CDS", "5UTR", and "3UTR"
11 # just put the corresponding type in your GFF file as the key in the key:value pairs
12 # in the ftypeDict below
13 ftypeDict = {"CDS": "CDS",
15 "five_prime_utr": "5UTR",
16 "three_prime_utr": "3UTR"
27 infile = open(sys.argv[1])
28 outfile = open(sys.argv[2], "w")
33 fields = line.strip().split()
35 if fields[2] in ftypeDict:
36 # this part of the code will need to be customized, most likely
37 # how does the annotation define the gene, geneid, and chromosome
38 # for example, for Anopheles Gambiae we have
39 #chrX VectorBase mRNA 582 16387 . - . ID=vectorbase|AGAP000002-RA; stable_id=AGAP000002-RA.1; Parent=vectorbase|AGAP000002;
40 if fields[2] == "mRNA":
43 idfields = fields[9].split(";")
44 geneid = idfields[0].split("=")[1]
47 start = int(fields[3])
49 ftype = ftypeDict[fields[2]]
50 outline = "%s\t%s%d\t%s\t%d\t%d\t%s\t%s\n" % (geneid, source, index, chrom, start, stop, sense, ftype)
51 outfile.write(outline)