optimization_of_DNA-library.../plasmid.R

401 lines
17 KiB
R
Raw Normal View History

2023-12-14 17:51:45 +03:00
library(ggplot2)
library(stringr)
#####
#This part of scripts produces the plots of alignment data. Only plasmid-aligned reverse reads were taken here.
#For every sample was created a set of reads of equal capacity (min between all sample reads).
#####
#Uploading data in R
2023-12-15 21:52:51 +03:00
temp_name<-read.table("names_of_custom_plasmid.txt")
2023-12-18 14:31:34 +03:00
samples_ecoli <- c()
2023-12-15 21:52:51 +03:00
for (i in temp_name[,c("V1")]) {
2023-12-18 14:31:34 +03:00
samples_ecoli<-c(samples_ecoli, gsub("_plasmid_mapq20_table.txt", "", gsub("custom_", "", i)))
2023-12-15 21:52:51 +03:00
}
2023-12-18 14:31:34 +03:00
tp2_plasmid <- data.frame(matrix(ncol = 24, nrow = 0))
colnames(tp2_plasmid) <- unlist(strsplit("ID;Read;Count_SNPs;Cycles;GC;Mean_length_del;Mean_length_insert;Read_length;Align_length;Dels;Inserts;A>C;A>G;A>T;C>A;C>G;C>T;G>A;G>C;G>T;T>A;T>C;T>G",";"))
2023-12-15 21:52:51 +03:00
for (i in temp_name[,c("V1")]) {
temp<-read.csv(i, sep=";", header=TRUE)
temp<-subset(temp, Read==2)
2023-12-18 14:31:34 +03:00
temp<-temp[sample(nrow(temp), 100000),]
2023-12-15 21:52:51 +03:00
temp<-cbind(temp, "Sample"=gsub("_plasmid_mapq20_table.txt", "", gsub("custom_", "", i)))
2023-12-18 14:31:34 +03:00
tp2_plasmid<-rbind(tp2_plasmid, temp)
2023-12-14 17:51:45 +03:00
}
2023-12-15 21:52:51 +03:00
rm(temp, temp_name)
2023-12-18 14:31:34 +03:00
tp2_plasmid$Sample<-as.factor(tp2_plasmid$Sample)
2023-12-15 21:52:51 +03:00
dir.create("../report/images_new_plasmid")
2023-12-14 17:51:45 +03:00
#GC-content
2023-12-18 14:31:34 +03:00
ggplot(data = tp2_plasmid, aes(x = GC)) +
2023-12-14 17:51:45 +03:00
geom_density(stat = "density", fill = "pink", alpha = 0.5) +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
2023-12-14 17:51:45 +03:00
geom_vline(xintercept = 50, linetype="dotted") +
2023-12-18 14:31:34 +03:00
xlab("ГЦ-состав, %") +
ylab("Доля прочтений") +
2023-12-14 17:51:45 +03:00
xlim(25, 75) +
ylim(0, 0.15) +
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/gc.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
#Mismatches per cycle of sequence
2023-12-15 21:52:51 +03:00
df2<-data.frame(matrix(nrow = 0, ncol = 2))
colnames(df2)<-c("Cycles", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
cyc <-subset(tp2_plasmid, Sample == i)[,4]
2023-12-14 17:51:45 +03:00
cyc <- as.numeric(unlist(strsplit(cyc,",")))
df2<-rbind(df2, cbind("Cycles" = cyc, "Sample" =i))
}
df2$Sample<-as.factor(df2$Sample)
2023-12-15 21:52:51 +03:00
df2$Cycles<-as.numeric(df2$Cycles)
2023-12-14 17:51:45 +03:00
rm (cyc, i)
ggplot(df2, aes(x = Cycles)) +
geom_density(stat = "count", fill = "blue", alpha = 0.5) +
#geom_line(stat = "count", fill = "blue", alpha = 0.5) +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
xlab("Цикл прочтения, №")+
ylab("Количество замен")+
ggtitle("Количество однонуклеотидных замен по циклам") +
2023-12-14 17:51:45 +03:00
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/mpc.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
ggplot(df2, aes(x = Cycles)) +
geom_density(stat = "density", fill = "blue", alpha = 0.5) +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
xlab("Цикл прочтения, №")+
ylab("Доля от общего количества")+
ggtitle("Количество однонуклеотидных замен по циклам") +
2023-12-14 17:51:45 +03:00
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/mpc_density.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm (df2)
#Mismatches depending on GC-content
2023-12-18 14:31:34 +03:00
mgc2<-cbind(tp2_plasmid[, c("Count_SNPs", "Sample")], "GC" = round(tp2_plasmid$GC))
2023-12-14 17:51:45 +03:00
ggplot(mgc2, aes(x=GC, y=Count_SNPs)) +
stat_summary(fun = sum, geom="line", linewidth = 0.5) +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
xlab("ГЦ-состав, %")+
ylab("Количество однонуклеотидных замен")+
ggtitle("Количество однонуклеотидных замен в зависимости от ГЦ-состава прочтений") +
2023-12-14 17:51:45 +03:00
geom_vline(xintercept = 50, linetype="dotted") +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/mgc.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
ggplot(mgc2, aes(x=GC, y=Count_SNPs)) +
stat_summary(fun = mean, geom="line", size = 0.5) +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
xlab("ГЦ-состав, %")+
ylab("Среднее число однонуклеотидных замен на одно прочтение")+
ggtitle("Количество однонуклеотидных замен в зависимости от ГЦ-состава прочтения") +
2023-12-14 17:51:45 +03:00
geom_vline(xintercept = 50, linetype="dotted") +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/mgc_mean.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm (mgc2)
#Frequency of different types of mismatches
2023-12-15 21:52:51 +03:00
mtype2 <- data.frame(matrix(ncol = 3, nrow = 0))
colnames(mtype2)<- c("Sum", "type", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- apply(tp2_plasmid[, c(12:23)][tp2_plasmid$Sample==i,], 2, sum)
2023-12-14 17:51:45 +03:00
temp <- data.frame("Sum" = temp)
2023-12-18 14:31:34 +03:00
temp$type<-as.factor(colnames(tp2_plasmid[, c(12:23)]))
2023-12-14 17:51:45 +03:00
temp<-cbind(temp, "Sample" = i)
mtype2<-rbind(mtype2, temp)
}
ggplot(mtype2) +
geom_line(aes(x = type, y = Sum, group = Sample)) +
geom_point(aes(x = type, y = Sum)) +
2023-12-18 14:31:34 +03:00
#facet_wrap(~Sample, nrow = 3, scales = "free_y") +
facet_wrap(~Sample, nrow = 3, ) +
xlab("Тип однонуклеотидной замены")+
ylab("Количество однонуклеотидных замен")+
ggtitle("Частота типов однонуклеотидных замен") +
2023-12-14 17:51:45 +03:00
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/mt.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
2023-12-15 21:52:51 +03:00
mtype2_2 <- data.frame(matrix(ncol = 3, nrow = 0))
colnames(mtype2_2)<- c("mean", "type", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- apply(tp2_plasmid[tp2_plasmid$Sample==i, c(12:23)], 2, mean)
2023-12-14 17:51:45 +03:00
temp <- data.frame("mean" = temp)
2023-12-18 14:31:34 +03:00
temp$type<-as.factor(colnames(tp2_plasmid[, c(12:23)]))
2023-12-14 17:51:45 +03:00
temp<-cbind(temp, "Sample" = i)
mtype2_2<-rbind(mtype2_2, temp)
}
ggplot(mtype2_2) +
geom_line(aes(x = type, y = mean, group = Sample)) +
geom_point(aes(x = type, y = mean)) +
2023-12-18 14:31:34 +03:00
#facet_wrap(~Sample, nrow = 3, scales = "free_y") +
facet_wrap(~Sample, nrow = 3) +
xlab("Тип однонуклеотидной замены")+
ylab("Среднее по прочтению")+
ggtitle("Частота типов однонуклеотидных замен") +
2023-12-14 17:51:45 +03:00
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/mt_mean.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm(mtype2, mtype2_2)
#Number of deletions per sample
2023-12-15 21:52:51 +03:00
delets2 <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(delets2)<- c("Nucleotide", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- tp2_plasmid[tp2_plasmid$Sample==i,][,10]
2023-12-14 17:51:45 +03:00
temp <- unlist(strsplit(temp,""))
temp<- temp[temp %in% c(letters, LETTERS)]
temp<-cbind("Nucleotide"=temp, "Sample" = i)
temp<-data.frame(temp)
temp$Nucleotide <- as.factor(temp$Nucleotide)
delets2<-rbind(temp, delets2)
}
ggplot(delets2) +
2023-12-15 21:52:51 +03:00
geom_bar(aes(x = Nucleotide), width = 0.4, fill = "blue", alpha = 0.5, col = "black") +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
2023-12-14 17:51:45 +03:00
#ylim(0, 1500) +
2023-12-18 14:31:34 +03:00
xlab("Нуклеотид")+
ylab("Количество делеций")+
ggtitle("Состав делеций") +
2023-12-14 17:51:45 +03:00
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/delets.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm (delets2)
#Number of insertions per sample
2023-12-15 21:52:51 +03:00
inserts2 <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(inserts2)<- c("Nucleotide", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- tp2_plasmid[tp2_plasmid$Sample==i,][,11]
2023-12-14 17:51:45 +03:00
temp <- unlist(strsplit(temp,""))
temp<- temp[temp %in% c(letters, LETTERS)]
temp<-cbind("Nucleotide"=temp, "Sample" = i)
temp<-data.frame(temp)
temp$Nucleotide <- as.factor(temp$Nucleotide)
inserts2<-rbind(temp, inserts2)
}
ggplot(inserts2) +
2023-12-18 14:31:34 +03:00
xlab("Нуклеотид")+
ylab("Количество вставок")+
ggtitle("Состав вставок") +
2023-12-15 21:52:51 +03:00
geom_bar(aes(x = Nucleotide), width = 0.4, fill = "blue", alpha = 0.5, col = "black") +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
2023-12-14 17:51:45 +03:00
#ylim(0, 2000) +
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/inserts.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm (inserts2)
#Mean, min and max values of insertions per sample
2023-12-15 21:52:51 +03:00
insert_mean2 <- data.frame(matrix(ncol = 7, nrow = 0))
colnames(insert_mean2)<- c("Sample", "Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp<-subset(tp2_plasmid, Mean_length_del!=0 & Sample == i)[,7]
2023-12-14 17:51:45 +03:00
temp<-cbind("Sample"=i, t(summary(temp)))
insert_mean2<-rbind(insert_mean2, temp)
}
2023-12-15 21:52:51 +03:00
insert_mean2[, 2]<-as.numeric(insert_mean2[, 2])
insert_mean2[, 4]<-as.numeric(insert_mean2[, 4])
insert_mean2[, 5]<-as.numeric(insert_mean2[, 5])
insert_mean2[, 7]<-as.numeric(insert_mean2[, 7])
2023-12-14 17:51:45 +03:00
ggplot(data.frame(insert_mean2), aes(x = factor(Sample), y = Mean), size = 0.5) +
geom_pointrange(aes(ymin = 0, ymax = Max.)) +
geom_point(aes(x = factor(Sample), y = 0), shape = 1) +
geom_point(aes(x = factor(Sample), y = Max.), shape = 1) +
2023-12-18 14:31:34 +03:00
xlab("Образец")+
ylab("Количество нуклеотидов")+
ggtitle("Статистика длины вставки") +
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/iml.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm (insert_mean2)
#Mean, min and max values of deletions per sample
2023-12-15 21:52:51 +03:00
delete_mean2 <- data.frame(matrix(ncol = 7, nrow = 0))
colnames(delete_mean2)<- c("Sample", "Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp<-subset(tp2_plasmid, Mean_length_del!=0 & Sample == i)[,6]
2023-12-14 17:51:45 +03:00
temp<-cbind("Sample"=i, t(summary(temp)))
delete_mean2<-rbind(delete_mean2, temp)
}
2023-12-15 21:52:51 +03:00
delete_mean2[, 2]<-as.numeric(delete_mean2[, 2])
delete_mean2[, 4]<-as.numeric(delete_mean2[, 4])
delete_mean2[, 5]<-as.numeric(delete_mean2[, 5])
delete_mean2[, 7]<-as.numeric(delete_mean2[, 7])
2023-12-14 17:51:45 +03:00
ggplot(data.frame(delete_mean2), aes(x = factor(Sample), y = Mean), size = 0.5) +
geom_pointrange(aes(ymin = 0, ymax = Max.)) +
geom_point(aes(x = factor(Sample), y = 0), shape = 1) +
geom_point(aes(x = factor(Sample), y = Max.), shape = 1) +
2023-12-18 14:31:34 +03:00
xlab("Образец")+
ylab("Количество нуклеотидов")+
ggtitle("Статистика длины делеции") +
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/dml.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm(delete_mean2)
#Number of mismatches per sample
2023-12-18 14:31:34 +03:00
ggplot(tp2_plasmid, aes(x=Sample, y=Count_SNPs, group = 1)) +
2023-12-15 21:52:51 +03:00
stat_summary(fun = sum, geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
2023-12-18 14:31:34 +03:00
xlab("Образец")+
ylab("Количество замен")+
ggtitle("Количество однонуклеотидных замен") +
2023-12-14 17:51:45 +03:00
#geom_vline(xintercept = 50, linetype="dotted") +
2023-12-18 14:31:34 +03:00
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/mps_mean.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
2023-12-18 14:31:34 +03:00
ggplot(tp2_plasmid, aes(x=Sample, y=Count_SNPs, group = 1)) +
stat_summary(fun = mean, geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
xlab("Образец")+
ylab("Среднее число замен")+
ggtitle("Количество однонуклеотидных замен на одно прочтение") +
2023-12-14 17:51:45 +03:00
#geom_vline(xintercept = 50, linetype="dotted") +
2023-12-18 14:31:34 +03:00
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/mps.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
#Number of deletions per cycle of sequence
numbers_only <- function(x) !grepl("\\D", x)
2023-12-15 21:52:51 +03:00
dels2 <- data.frame(matrix(nrow = 0, ncol = 2))
colnames(dels2)<-c("Num", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- tp2_plasmid[tp2_plasmid$Sample==i,][,10]
2023-12-14 17:51:45 +03:00
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
temp <- unlist(strsplit(temp,","))
temp<- temp[numbers_only(temp)]
temp<-cbind("Num"=temp, "Sample" = i)
temp<-data.frame(temp)
dels2<-rbind(temp, dels2)
}
dels2$Num<-as.numeric(dels2$Num)
ggplot(dels2, aes(x = Num)) +
#geom_density(stat = "count", alpha = 0.5) +
stat_count(geom="line", position="identity") +
stat_count(geom="point", position="identity", size = 1) +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
2023-12-14 17:51:45 +03:00
#ylim(0, 1500) +
2023-12-18 14:31:34 +03:00
xlab("Цикл прочтения, №")+
ylab("Количество делеций")+
ggtitle("Число делеций на цикл") +
2023-12-14 17:51:45 +03:00
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/dpc.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm(dels2)
#Number of insertions per cycle of sequence
2023-12-15 21:52:51 +03:00
ins2 <- data.frame(matrix(nrow = 0, ncol = 2))
colnames(ins2)<-c("Num", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- tp2_plasmid[tp2_plasmid$Sample==i,][,11]
2023-12-14 17:51:45 +03:00
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
temp <- unlist(strsplit(temp,","))
temp<- temp[numbers_only(temp)]
temp<-cbind("Num"=temp, "Sample" = i)
temp<-data.frame(temp)
ins2<-rbind(temp, ins2)
}
ins2$Num<-as.numeric(ins2$Num)
ggplot(ins2, aes(x = Num)) +
stat_count(geom="line", position="identity") +
stat_count(geom="point", position="identity", size = 1) +
2023-12-18 14:31:34 +03:00
facet_wrap(~Sample, nrow = 3) +
2023-12-14 17:51:45 +03:00
#ylim(0, 1500) +
2023-12-18 14:31:34 +03:00
xlab("Цикл прочтения, №")+
ylab("Количество вставок")+
ggtitle("Число вставок на цикл") +
2023-12-14 17:51:45 +03:00
scale_x_continuous(breaks=seq(0, 150, 25))+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/ipc.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm (ins2)
#Number of deletions per sample
2023-12-15 21:52:51 +03:00
parse_delets2 <- data.frame(matrix(nrow = 0, ncol = 2))
colnames(parse_delets2)<-c("Num", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- tp2_plasmid[tp2_plasmid$Sample==i,][,10]
2023-12-14 17:51:45 +03:00
temp <- unlist(strsplit(temp,split="|",fixed = TRUE))
temp<-cbind("Num"=temp, "Sample" = i)
temp<-data.frame(temp)
parse_delets2<-rbind(temp, parse_delets2)
}
ggplot(parse_delets2, aes(x=Sample, group = 1)) +
stat_count(geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
2023-12-18 14:31:34 +03:00
xlab("Образец")+
ylab("Количество делеций")+
ggtitle("Число делеций на образец") +
2023-12-14 17:51:45 +03:00
#geom_vline(xintercept = 50, linetype="dotted") +
2023-12-18 14:31:34 +03:00
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/dps.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm (parse_delets2)
#Number of insertions per sample
2023-12-15 21:52:51 +03:00
parse_inserts2 <- data.frame(matrix(nrow = 0, ncol = 2))
colnames(parse_inserts2)<-c("Num", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- tp2_plasmid[tp2_plasmid$Sample==i,][,11]
2023-12-14 17:51:45 +03:00
temp <- unlist(strsplit(temp,split="|",fixed = TRUE))
temp<-cbind("Num"=temp, "Sample" = i)
temp<-data.frame(temp)
parse_inserts2<-rbind(temp, parse_inserts2)
}
ggplot(parse_inserts2, aes(x=Sample, group = 1)) +
stat_count(geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
2023-12-18 14:31:34 +03:00
xlab("Образец")+
ylab("Количество вставок")+
ggtitle("Число вставок на образец") +
2023-12-14 17:51:45 +03:00
#geom_vline(xintercept = 50, linetype="dotted") +
2023-12-18 14:31:34 +03:00
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-15 21:52:51 +03:00
ggsave("../report/images_new_plasmid/ips.png", width = 2250, height = 1500, units = "px")
2023-12-14 17:51:45 +03:00
rm (parse_inserts2, temp)
#Frequency of insertion motifs per sample
2023-12-15 21:52:51 +03:00
insert_motifs2 <- data.frame(matrix(nrow = 0, ncol = 2))
colnames(insert_motifs2)<-c("Num", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- tp2_plasmid[tp2_plasmid$Sample==i,][,11]
2023-12-14 17:51:45 +03:00
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
temp <- unlist(strsplit(temp,","))
temp<- temp[!numbers_only(temp)]
temp<-cbind("Num"=temp, "Sample" = i)
temp<-data.frame(temp)
insert_motifs2<-rbind(temp, insert_motifs2)
}
ggplot(insert_motifs2, aes(x = Num)) +
stat_count(geom='bar', width = 0.7, fill = "white", color = "black", alpha = 0.7) +
facet_wrap(~Sample, nrow=4, scales = "free_y") +
2023-12-18 14:31:34 +03:00
xlab("Мотив")+
ylab("Количество")+
ggtitle("Представленность мотивов вставок в образцах") +
2023-12-14 17:51:45 +03:00
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-18 14:31:34 +03:00
ggsave("../report/images_new_plasmid/imps.png", width = 7550, height = 2500, units = "px")
2023-12-14 17:51:45 +03:00
rm (insert_motifs2, temp)
#Frequency of deletion motifs per sample
2023-12-15 21:52:51 +03:00
delete_motifs2 <- data.frame(matrix(nrow = 0, ncol = 2))
colnames(delete_motifs2)<-c("Num", "Sample")
2023-12-18 14:31:34 +03:00
for (i in samples_ecoli) {
temp <- tp2_plasmid[tp2_plasmid$Sample==i,][,10]
2023-12-14 17:51:45 +03:00
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
temp <- unlist(strsplit(temp,","))
temp<- temp[!numbers_only(temp)]
temp<-cbind("Num"=temp, "Sample" = i)
temp<-data.frame(temp)
delete_motifs2<-rbind(temp, delete_motifs2)
}
ggplot(delete_motifs2, aes(x = Num)) +
stat_count(geom='bar', width = 0.7, fill = "white", color = "black", alpha = 0.7) +
facet_wrap(~Sample, nrow=4, scales = "free_y") +
2023-12-18 14:31:34 +03:00
xlab("Мотив")+
ylab("Количество")+
ggtitle("Представленность мотивов делеций в образцах") +
2023-12-14 17:51:45 +03:00
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
2023-12-18 14:31:34 +03:00
ggsave("../report/images_new_plasmid/dmps.png", width = 7550, height = 2500, units = "px")
2023-12-14 17:51:45 +03:00
rm (delete_motifs2, temp)