# original gtf Mh:0000:MhA1_Contig0 Freeze3 CDS 1425 1586 . + 0 transcript_id "MhA1_Contig0.frz3.gene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 1637 1930 . + 0 transcript_id "MhA1_Contig0.frz3.gene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 2143 2275 . + 0 transcript_id "MhA1_Contig0.frz3.gene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 2333 2437 . + 2 transcript_id "MhA1_Contig0.frz3.gene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 2652 2890 . + 2 transcript_id "MhA1_Contig0.frz3.gene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 4027 4090 15.97 + 0 transcript_id "MhA1_Contig0.frz3.fgene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 4141 4251 7.12 + 1 transcript_id "MhA1_Contig0.frz3.fgene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 4311 4390 9.54 + 1 transcript_id "MhA1_Contig0.frz3.fgene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 4471 4576 11.80 + 0 transcript_id "MhA1_Contig0.frz3.fgene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 4635 4678 -4.98 + 1 transcript_id "MhA1_Contig0.frz3.fgene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 4760 4959 14.82 + 0 transcript_id "MhA1_Contig0.frz3.fgene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 5070 5193 4.65 + 2 transcript_id "MhA1_Contig0.frz3.fgene1"; Mh:0000:MhA1_Contig0 Freeze3 CDS 5243 5274 -3.08 + 0 transcript_id "MhA1_Contig0.frz3.fgene1"; #dexseq_prepare_annotation.py exons = HTSeq.GenomicArrayOfSets( "auto", stranded=True ) for f in HTSeq.GFF_Reader( gtf_file ): if f.type != "CDS": continue f.attr['transcript_id'] = f.attr['transcript_id'].replace( ":", "_" ) exons[f.iv] += ( f.attr['transcript_id'], f.attr['transcript_id'] ) #output gtf Mh:0000:MhA1_Contig0 Mh.gtf aggregate_gene 1425 2890 . + . gene_id "MhA1_Contig0.frz3.gene1" Mh:0000:MhA1_Contig0 Mh.gtf exonic_part 1425 1586 . + . transcripts "MhA1_Contig0.frz3.gene1"; exonic_part_number "001"; gene_id "MhA1_Contig0.frz3.gene1" Mh:0000:MhA1_Contig0 Mh.gtf exonic_part 1637 1930 . + . transcripts "MhA1_Contig0.frz3.gene1"; exonic_part_number "002"; gene_id "MhA1_Contig0.frz3.gene1" Mh:0000:MhA1_Contig0 Mh.gtf exonic_part 2143 2275 . + . transcripts "MhA1_Contig0.frz3.gene1"; exonic_part_number "003"; gene_id "MhA1_Contig0.frz3.gene1" Mh:0000:MhA1_Contig0 Mh.gtf exonic_part 2333 2437 . + . transcripts "MhA1_Contig0.frz3.gene1"; exonic_part_number "004"; gene_id "MhA1_Contig0.frz3.gene1" Mh:0000:MhA1_Contig0 Mh.gtf exonic_part 2652 2890 . + . transcripts "MhA1_Contig0.frz3.gene1"; exonic_part_number "005"; gene_id "MhA1_Contig0.frz3.gene1" Mh:0000:MhA1_Contig0 Mh.gtf aggregate_gene 4027 5970 . + . gene_id "MhA1_Contig0.frz3.fgene1" Mh:0000:MhA1_Contig0 Mh.gtf exonic_part 4027 4090 . + . transcripts "MhA1_Contig0.frz3.fgene1"; exonic_part_number "001"; gene_id "MhA1_Contig0.frz3.fgene1" Mh:0000:MhA1_Contig0 Mh.gtf exonic_part 4141 4251 . + . transcripts "MhA1_Contig0.frz3.fgene1"; exonic_part_number "002"; gene_id "MhA1_Contig0.frz3.fgene1" # original gtf Mt:3452:Mt3.5.1Chr1 E exon 150051 150327 . - . transcript_id "Medtr1g005000.1"; gene_id "Medtr1g005000"; Mt:3452:Mt3.5.1Chr1 E exon 150871 154771 . - . transcript_id "Medtr1g005000.1"; gene_id "Medtr1g005000"; Mt:3452:Mt3.5.1Chr1 E CDS 150051 150327 . - 1 transcript_id "Medtr1g005000.1"; gene_id "Medtr1g005000"; Mt:3452:Mt3.5.1Chr1 E CDS 150871 150875 . - 0 transcript_id "Medtr1g005000.1"; gene_id "Medtr1g005000"; Mt:3452:Mt3.5.1Chr1 E exon 154859 155160 . + . transcript_id "Medtr1g005010.1"; gene_id "Medtr1g005010"; Mt:3452:Mt3.5.1Chr1 E CDS 154903 155139 . + 0 transcript_id "Medtr1g005010.1"; gene_id "Medtr1g005010"; Mt:3452:Mt3.5.1Chr1 E exon 155201 161330 . + . transcript_id "Medtr1g005020.1"; gene_id "Medtr1g005020"; Mt:3452:Mt3.5.1Chr1 E exon 161501 161880 . + . transcript_id "Medtr1g005020.1"; gene_id "Medtr1g005020"; Mt:3452:Mt3.5.1Chr1 E exon 162000 162267 . + . transcript_id "Medtr1g005020.1"; gene_id "Medtr1g005020"; #dexseq_prepare_annotation.py exons = HTSeq.GenomicArrayOfSets( "auto", stranded=True ) for f in HTSeq.GFF_Reader( gtf_file ): if f.type != "exon": continue f.attr['transcript_id'] = f.attr['transcript_id'].replace( ":", "_" ) exons[f.iv] += ( f.attr['transcript_id'], f.attr['transcript_id'] ) #output gtf Mt:3452:Mt3.5.1Chr1 Mt.gtf aggregate_gene 150051 154771 . - . gene_id "Medtr1g005000.1" Mt:3452:Mt3.5.1Chr1 Mt.gtf exonic_part 150051 150327 . - . transcripts "Medtr1g005000.1"; exonic_part_number "001"; gene_id "Medtr1g005000.1" Mt:3452:Mt3.5.1Chr1 Mt.gtf exonic_part 150871 154771 . - . transcripts "Medtr1g005000.1"; exonic_part_number "002"; gene_id "Medtr1g005000.1" Mt:3452:Mt3.5.1Chr1 Mt.gtf aggregate_gene 154859 155160 . + . gene_id "Medtr1g005010.1" Mt:3452:Mt3.5.1Chr1 Mt.gtf exonic_part 154859 155160 . + . transcripts "Medtr1g005010.1"; exonic_part_number "001"; gene_id "Medtr1g005010.1" Mt:3452:Mt3.5.1Chr1 Mt.gtf aggregate_gene 155201 162479 . + . gene_id "Medtr1g005020.1"