Hi all!
I'm currently working with a multi entry genbank file and two dataframes in order to add new qualifiers to the genbank file via its key/value system.
Each entry is a contig, and so far I am able to add new keys on top of the already existing "locus_tag" and "translation" for each entry. However, I am having some difficulties adding values from the dataframes to each contigs.
Each dataframe is made of 3 columns but holds 6000+ lines of data. I am able to insert one specific column into the genbank file, but these 6000 lines print for every contig.
I've tried making a for loop, but the 6000 lines continue to print and I am not sure what else to do. Any help would be greatly appreciated!
This is the code I am working with:
import os
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO
import pandas as pd
df1 = pd.read_csv("-f_besthit.csv")
df2 = pd.read_csv("-f_filtered.csv")
annotation_handle = open("META.gbk","r")
recs = [rec for rec in SeqIO.parse("META.gbk", "genbank")]
my_start_pos = (2)
my_end_pos = (6)
my_feature_location = FeatureLocation(my_start_pos,my_end_pos)
for rec in recs:
my_feature_type = "CDS"
full_product={"full_product":"df_best[full_product]","complement":"(1..423)", "locus_tag":"contig", "besthit":"df_fil[besthit]"}
my_feature = SeqFeature(my_feature_location,type=my_feature_type, qualifiers=full_product)
besthit={"besthit":"df_fil[besthit]"}
my_feature_one = SeqFeature(my_feature_location,type=my_feature_type, qualifiers=besthit)
rec.features.append(my_feature)
feats = [feat for feat in rec.features if feat.type == "CDS"]
for feat in feats:
print(feat)
for record in SeqIO.parse(annotation_handle,"genbank"):
a = len(record.features)
for_rast = open("META.gbk","w")
x = 0
final_features = []
for f in record.features:
if f.type == "CDS":
f.qualifiers["full_product"] = "%s_%s" % (df2.loc[:,"besthit"], x+1)
x += 1
for f in record.features:
if f.qualifiers["full_product"] == df2.loc[:"orf"]:
final_features.append(f)
else:
pass
record.features = final_features
with open("META.gbk","w") as for_rast:
SeqIO.write(record, for_rast, "genbank")
I'm currently working with a multi entry genbank file and two dataframes in order to add new qualifiers to the genbank file via its key/value system.
Each entry is a contig, and so far I am able to add new keys on top of the already existing "locus_tag" and "translation" for each entry. However, I am having some difficulties adding values from the dataframes to each contigs.
Each dataframe is made of 3 columns but holds 6000+ lines of data. I am able to insert one specific column into the genbank file, but these 6000 lines print for every contig.
I've tried making a for loop, but the 6000 lines continue to print and I am not sure what else to do. Any help would be greatly appreciated!
This is the code I am working with:
import os
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO
import pandas as pd
df1 = pd.read_csv("-f_besthit.csv")
df2 = pd.read_csv("-f_filtered.csv")
annotation_handle = open("META.gbk","r")
recs = [rec for rec in SeqIO.parse("META.gbk", "genbank")]
my_start_pos = (2)
my_end_pos = (6)
my_feature_location = FeatureLocation(my_start_pos,my_end_pos)
for rec in recs:
my_feature_type = "CDS"
full_product={"full_product":"df_best[full_product]","complement":"(1..423)", "locus_tag":"contig", "besthit":"df_fil[besthit]"}
my_feature = SeqFeature(my_feature_location,type=my_feature_type, qualifiers=full_product)
besthit={"besthit":"df_fil[besthit]"}
my_feature_one = SeqFeature(my_feature_location,type=my_feature_type, qualifiers=besthit)
rec.features.append(my_feature)
feats = [feat for feat in rec.features if feat.type == "CDS"]
for feat in feats:
print(feat)
for record in SeqIO.parse(annotation_handle,"genbank"):
a = len(record.features)
for_rast = open("META.gbk","w")
x = 0
final_features = []
for f in record.features:
if f.type == "CDS":
f.qualifiers["full_product"] = "%s_%s" % (df2.loc[:,"besthit"], x+1)
x += 1
for f in record.features:
if f.qualifiers["full_product"] == df2.loc[:"orf"]:
final_features.append(f)
else:
pass
record.features = final_features
with open("META.gbk","w") as for_rast:
SeqIO.write(record, for_rast, "genbank")