Hey,
I've ran the cufflinks pipeline with both flybase release 5 and UCSC release 3 Drosophila melanogaster annotations, and I'm getting strange annotations in the outputs. The example below is for the UCSC annotation, since it ended up working better with cufflinks overall (but this problem was also the case for the flybase annotation).
CuffMerge entries for "galectin" gene
chr2L Cufflinks exon 21821 22941 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000005"; exon_number "1"; gene_name "galectin"; oId "CUFF.59.1"; nearest_ref "NM_001272859"; class_code "j"; tss_id "TSS2";
chr2L Cufflinks exon 22998 23422 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000005"; exon_number "2"; gene_name "galectin"; oId "CUFF.59.1"; nearest_ref "NM_001272859"; class_code "j"; tss_id "TSS2";
chr2L Cufflinks exon 74903 75018 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000005"; exon_number "3"; gene_name "galectin"; oId "CUFF.59.1"; nearest_ref "NM_001272859"; class_code "j"; tss_id "TSS2";
chr2L Cufflinks exon 75078 76276 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000005"; exon_number "4"; gene_name "galectin"; oId "CUFF.59.1"; nearest_ref "NM_001272859"; class_code "j"; tss_id "TSS2";
UCSC annotation entries for "galectin"
chr2L unknown exon 71757 71804 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown exon 71950 72081 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown CDS 72013 72081 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown start_codon 72013 72015 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown exon 72387 72977 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown CDS 72603 72977 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown start_codon 72603 72605 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown exon 73485 73692 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown exon 73485 73692 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown CDS 73570 73692 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown start_codon 73570 73572 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown exon 73820 73897 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown exon 74129 74572 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown CDS 74501 74572 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown start_codon 74501 74503 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown CDS 74903 75018 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown CDS 74903 75018 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown CDS 74903 75018 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown CDS 74903 75018 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown CDS 75078 76095 . + 1 gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown CDS 75078 76095 . + 1 gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown CDS 75078 76095 . + 1 gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown CDS 75078 76095 . + 1 gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown CDS 75280 76095 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown start_codon 75280 75282 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
As you can see, in the annotation galectin starts at 71757 on 2L, ending at 76098, however cufflinks has placed it starting at 21821 and ending at 76276.
Any thoughts on this would be very much appreciated.
Thanks,
Gordon
I've ran the cufflinks pipeline with both flybase release 5 and UCSC release 3 Drosophila melanogaster annotations, and I'm getting strange annotations in the outputs. The example below is for the UCSC annotation, since it ended up working better with cufflinks overall (but this problem was also the case for the flybase annotation).
CuffMerge entries for "galectin" gene
chr2L Cufflinks exon 21821 22941 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000005"; exon_number "1"; gene_name "galectin"; oId "CUFF.59.1"; nearest_ref "NM_001272859"; class_code "j"; tss_id "TSS2";
chr2L Cufflinks exon 22998 23422 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000005"; exon_number "2"; gene_name "galectin"; oId "CUFF.59.1"; nearest_ref "NM_001272859"; class_code "j"; tss_id "TSS2";
chr2L Cufflinks exon 74903 75018 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000005"; exon_number "3"; gene_name "galectin"; oId "CUFF.59.1"; nearest_ref "NM_001272859"; class_code "j"; tss_id "TSS2";
chr2L Cufflinks exon 75078 76276 . + . gene_id "XLOC_000002"; transcript_id "TCONS_00000005"; exon_number "4"; gene_name "galectin"; oId "CUFF.59.1"; nearest_ref "NM_001272859"; class_code "j"; tss_id "TSS2";
UCSC annotation entries for "galectin"
chr2L unknown exon 71757 71804 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown exon 71950 72081 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown CDS 72013 72081 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown start_codon 72013 72015 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown exon 72387 72977 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown CDS 72603 72977 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown start_codon 72603 72605 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown exon 73485 73692 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown exon 73485 73692 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown CDS 73570 73692 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown start_codon 73570 73572 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown exon 73820 73897 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown exon 74129 74572 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown CDS 74501 74572 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown start_codon 74501 74503 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown CDS 74903 75018 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown CDS 74903 75018 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown CDS 74903 75018 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown CDS 74903 75018 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown exon 74903 75018 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown CDS 75078 76095 . + 1 gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown CDS 75078 76095 . + 1 gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown CDS 75078 76095 . + 1 gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown CDS 75078 76095 . + 1 gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown exon 75078 76211 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown CDS 75280 76095 . + 0 gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown start_codon 75280 75282 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P8803"; transcript_id "NM_001169367"; tss_id "TSS3981";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P12530"; transcript_id "NM_134643"; tss_id "TSS12137";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P18001"; transcript_id "NM_001258884"; tss_id "TSS6545";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P7409"; transcript_id "NM_001169366"; tss_id "TSS12421";
chr2L unknown stop_codon 76096 76098 . + . gene_id "galectin"; gene_name "galectin"; p_id "P9464"; transcript_id "NM_001272859"; tss_id "TSS3981";
As you can see, in the annotation galectin starts at 71757 on 2L, ending at 76098, however cufflinks has placed it starting at 21821 and ending at 76276.
Any thoughts on this would be very much appreciated.
Thanks,
Gordon