R-scripts fixed
This commit is contained in:
parent
fe7bdc3ad6
commit
c4f360144d
389
bacterial_genome_zvl.R
Normal file
389
bacterial_genome_zvl.R
Normal file
@ -0,0 +1,389 @@
|
|||||||
|
library(ggplot2)
|
||||||
|
library(stringr)
|
||||||
|
#####
|
||||||
|
#This part of scripts produces the plots of alignment data in a directory "../report/images_assembly_zvl/" with format ".png". Only genome-aligned reverse reads were taken here.
|
||||||
|
#For every sample was created a set of reads of equal capacity (min between all sample reads, estimated manually).
|
||||||
|
#####
|
||||||
|
|
||||||
|
#Uploading data in R
|
||||||
|
table_set <- data.frame(matrix(ncol = 24, nrow = 0))
|
||||||
|
colnames(table_set) <- unlist(strsplit("ID;Read;Count_SNPs;Cycles;GC;Mean_length_del;Mean_length_insert;Read_length;Align_length;Dels;Inserts;A>C;A>G;A>T;C>A;C>G;C>T;G>A;G>C;G>T;T>A;T>C;T>G",";"))
|
||||||
|
|
||||||
|
temp_name<-read.table("names_of_custom_chromosome.txt")
|
||||||
|
samples <- c()
|
||||||
|
for (i in temp_name[,c("V1")]) {
|
||||||
|
samples<-c(samples, gsub("_genome_mapq20_table.txt", "", gsub("custom_", "", i)))
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i in samples) {
|
||||||
|
filename<-paste("custom_", i, "_genome_mapq20_table.txt", sep='')
|
||||||
|
temp<-read.csv(filename, sep=";", header=TRUE)
|
||||||
|
temp<-subset(temp, Read==2)
|
||||||
|
temp<-temp[sample(nrow(temp), 200000),]
|
||||||
|
temp<-cbind(temp, "Sample"=i)
|
||||||
|
table_set<-rbind(table_set, temp)
|
||||||
|
}
|
||||||
|
rm(temp)
|
||||||
|
table_set$Sample<-as.factor(table_set$Sample)
|
||||||
|
dir.create("../report/images_assembly_zvl")
|
||||||
|
|
||||||
|
#GC-content
|
||||||
|
|
||||||
|
ggplot(data = table_set, aes(x = GC)) +
|
||||||
|
geom_density(stat = "density", fill = "pink", alpha = 0.5) +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/gc.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
#Mismatches per cycle of sequence
|
||||||
|
|
||||||
|
df2<-data.frame("Cycles"=0, "Sample"=1)
|
||||||
|
for (i in samples) {
|
||||||
|
cyc <-subset(table_set, Sample == i)[,4]
|
||||||
|
cyc <- as.numeric(unlist(strsplit(cyc,",")))
|
||||||
|
df2<-rbind(df2, cbind("Cycles" = cyc, "Sample" =i))
|
||||||
|
}
|
||||||
|
df2<-df2[-1,]
|
||||||
|
df2$Sample<-as.factor(df2$Sample)
|
||||||
|
df2$Cycles<-as.numeric(df2$Cycles)
|
||||||
|
rm (cyc, i)
|
||||||
|
|
||||||
|
ggplot(df2, aes(x = Cycles)) +
|
||||||
|
geom_density(stat = "count", fill = "blue", alpha = 0.5) +
|
||||||
|
#geom_line(stat = "count", fill = "blue", alpha = 0.5) +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
ggtitle("Mismatches per cycle") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/mpc.png", width = 2250, height = 1500, units = "px")
|
||||||
|
ggplot(df2, aes(x = Cycles)) +
|
||||||
|
geom_density(stat = "density", fill = "blue", alpha = 0.5) +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
ggtitle("Mismatches per cycle") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/mpc_density.png", width = 2250, height = 1500, units = "px")
|
||||||
|
rm (df2)
|
||||||
|
|
||||||
|
#Mismatches depending on GC-content
|
||||||
|
|
||||||
|
mgc2<-cbind(table_set[, c("Count_SNPs", "Sample")], "GC" = round(table_set$GC))
|
||||||
|
|
||||||
|
ggplot(mgc2, aes(x=GC, y=Count_SNPs)) +
|
||||||
|
stat_summary(fun = sum, geom="line", linewidth = 0.5) +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
ggtitle("Mismatches/GC") +
|
||||||
|
geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
|
theme_bw()+
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/mgc.png", width = 2250, height = 1500, units = "px")
|
||||||
|
ggplot(mgc2, aes(x=GC, y=Count_SNPs)) +
|
||||||
|
stat_summary(fun = mean, geom="line", size = 0.5) +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
ggtitle("Mismatches/GC") +
|
||||||
|
geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
|
theme_bw()+
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/mgc_mean.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
rm (mgc2)
|
||||||
|
|
||||||
|
#Frequency of different types of mismatches
|
||||||
|
|
||||||
|
mtype2 <- data.frame(matrix(ncol = 3, nrow = 0))
|
||||||
|
colnames(mtype2)<- c("Sum", "type", "Sample")
|
||||||
|
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- apply(table_set[, c(12:23)][table_set$Sample==i,], 2, sum)
|
||||||
|
temp <- data.frame("Sum" = temp)
|
||||||
|
temp$type<-as.factor(colnames(table_set[, c(12:23)]))
|
||||||
|
temp<-cbind(temp, "Sample" = i)
|
||||||
|
mtype2<-rbind(mtype2, temp)
|
||||||
|
}
|
||||||
|
rm(temp)
|
||||||
|
|
||||||
|
ggplot(mtype2) +
|
||||||
|
geom_line(aes(x = type, y = Sum, group = Sample)) +
|
||||||
|
geom_point(aes(x = type, y = Sum)) +
|
||||||
|
#facet_wrap(~Sample, nrow = 3, scales = "free_y") +
|
||||||
|
facet_wrap(~Sample, nrow = 3, ) +
|
||||||
|
ggtitle("Mismatch type") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
||||||
|
ggsave("../report/images_assembly_zvl/mt.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
mtype2_2 <- data.frame(matrix(ncol = 3, nrow = 0))
|
||||||
|
colnames(mtype2_2)<- c("mean", "type", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- apply(table_set[table_set$Sample==i, c(12:23)], 2, mean)
|
||||||
|
temp <- data.frame("mean" = temp)
|
||||||
|
temp$type<-as.factor(colnames(table_set[, c(12:23)]))
|
||||||
|
temp<-cbind(temp, "Sample" = i)
|
||||||
|
mtype2_2<-rbind(mtype2_2, temp)
|
||||||
|
}
|
||||||
|
mtype2_2$Sample<-as.factor(mtype2_2$Sample)
|
||||||
|
|
||||||
|
ggplot(mtype2_2) +
|
||||||
|
geom_line(aes(x = type, y = mean, group = Sample)) +
|
||||||
|
geom_point(aes(x = type, y = mean)) +
|
||||||
|
#facet_wrap(~Sample, nrow = 3, scales = "free_y") +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
ggtitle("Mismatch type") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
||||||
|
ggsave("../report/images_assembly_zvl/mt_mean.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
rm (mtype2, mtype2_2)
|
||||||
|
|
||||||
|
#Number of deletions per sample
|
||||||
|
|
||||||
|
delets2 <- data.frame(matrix(ncol = 2, nrow = 0))
|
||||||
|
colnames(delets2)<- c("Nucleotide", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- table_set[table_set$Sample==i,][,10]
|
||||||
|
temp <- unlist(strsplit(temp,""))
|
||||||
|
temp<- temp[temp %in% c(letters, LETTERS)]
|
||||||
|
temp<-cbind("Nucleotide"=temp, "Sample" = i)
|
||||||
|
temp<-data.frame(temp)
|
||||||
|
temp$Nucleotide <- as.factor(temp$Nucleotide)
|
||||||
|
delets2<-rbind(temp, delets2)
|
||||||
|
}
|
||||||
|
delets2$Sample<-as.factor(delets2$Sample)
|
||||||
|
rm (temp)
|
||||||
|
|
||||||
|
ggplot(delets2) +
|
||||||
|
geom_bar(aes(x = Nucleotide), width = 0.4, fill = "blue", alpha = 0.5, col = "black") +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
#ylim(0, 1500) +
|
||||||
|
ggtitle ("Deletions") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/delets.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
rm (delets2)
|
||||||
|
|
||||||
|
#Number of insertions per sample
|
||||||
|
|
||||||
|
inserts2 <- data.frame(matrix(ncol = 2, nrow = 0))
|
||||||
|
colnames(inserts2)<- c("Nucleotide", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- table_set[table_set$Sample==i,][,11]
|
||||||
|
temp <- unlist(strsplit(temp,""))
|
||||||
|
temp<- temp[temp %in% c(letters, LETTERS)]
|
||||||
|
temp<-cbind("Nucleotide"=temp, "Sample" = i)
|
||||||
|
temp<-data.frame(temp)
|
||||||
|
temp$Nucleotide <- as.factor(temp$Nucleotide)
|
||||||
|
inserts2<-rbind(temp, inserts2)
|
||||||
|
}
|
||||||
|
inserts2$Sample<-as.factor(inserts2$Sample)
|
||||||
|
|
||||||
|
ggplot(inserts2) +
|
||||||
|
ggtitle("Insertions") +
|
||||||
|
geom_bar(aes(x = Nucleotide), width = 0.4, fill = "blue", alpha = 0.5, col = "black") +
|
||||||
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
|
#ylim(0, 2000) +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/inserts.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
rm (inserts2)
|
||||||
|
|
||||||
|
#Mean, min and max values of insertions per sample
|
||||||
|
|
||||||
|
insert_mean2 <- data.frame(matrix(ncol = 7, nrow = 0))
|
||||||
|
colnames(insert_mean2)<- c("Sample", "Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
|
||||||
|
for (i in samples) {
|
||||||
|
temp<-subset(table_set, Mean_length_insert!=0 & Sample == i)[,7]
|
||||||
|
temp<-cbind("Sample"=i, t(summary(temp)))
|
||||||
|
insert_mean2<-rbind(insert_mean2, temp)
|
||||||
|
}
|
||||||
|
insert_mean2[, 2]<-as.numeric(insert_mean2[, 2])
|
||||||
|
insert_mean2[, 4]<-as.numeric(insert_mean2[, 4])
|
||||||
|
insert_mean2[, 5]<-as.numeric(insert_mean2[, 5])
|
||||||
|
insert_mean2[, 7]<-as.numeric(insert_mean2[, 7])
|
||||||
|
ggplot(data.frame(insert_mean2), aes(x = factor(Sample), y = Mean), size = 0.5) +
|
||||||
|
geom_pointrange(aes(ymin = 0, ymax = Max.)) +
|
||||||
|
#scale_y_continuous(breaks = c(0:12), limits = c(0, 13)) +
|
||||||
|
geom_point(aes(x = factor(Sample), y = 0), shape = 1) +
|
||||||
|
geom_point(aes(x = factor(Sample), y = Max.), shape = 1) +
|
||||||
|
ggtitle("Insertion mean length") +
|
||||||
|
scale_x_discrete(name = "Sample") +
|
||||||
|
#scale_y_continuous(name = "Length", breaks = seq(0, 18, 1)) +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/iml.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
rm (insert_mean2)
|
||||||
|
|
||||||
|
#Mean, min and max values of deletions per sample
|
||||||
|
|
||||||
|
delete_mean2 <- data.frame(matrix(ncol = 7, nrow = 0))
|
||||||
|
colnames(delete_mean2)<- c("Sample", "Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
|
||||||
|
for (i in samples) {
|
||||||
|
temp<-subset(table_set, Mean_length_del!=0 & Sample == i)[,6]
|
||||||
|
temp<-cbind("Sample"=i, t(summary(temp)))
|
||||||
|
delete_mean2<-rbind(delete_mean2, temp)
|
||||||
|
}
|
||||||
|
delete_mean2[, 2]<-as.numeric(delete_mean2[, 2])
|
||||||
|
delete_mean2[, 4]<-as.numeric(delete_mean2[, 4])
|
||||||
|
delete_mean2[, 5]<-as.numeric(delete_mean2[, 5])
|
||||||
|
delete_mean2[, 7]<-as.numeric(delete_mean2[, 7])
|
||||||
|
ggplot(data.frame(delete_mean2), aes(x = factor(Sample), y = Mean), size = 0.5) +
|
||||||
|
geom_pointrange(aes(ymin = 0, ymax = Max.)) +
|
||||||
|
#scale_y_continuous(breaks = c(0:22), limits = c(0, 22)) +
|
||||||
|
geom_point(aes(x = factor(Sample), y = 0), shape = 1) +
|
||||||
|
geom_point(aes(x = factor(Sample), y = Max.), shape = 1) +
|
||||||
|
ggtitle("Deletion mean length") +
|
||||||
|
scale_x_discrete(name = "Sample") +
|
||||||
|
#scale_y_continuous(name = "Length", breaks = seq(0, 12, 1)) +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/dml.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
rm (delete_mean2)
|
||||||
|
|
||||||
|
#Number of mismatches per sample
|
||||||
|
|
||||||
|
ggplot(table_set, aes(x=Sample, y=Count_SNPs, group = 1)) +
|
||||||
|
stat_summary(fun = sum, geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
|
ggtitle("Mismatches per sample") +
|
||||||
|
#geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/mps_mean.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
|
||||||
|
ggplot(table_set, aes(x=Sample, y=Count_SNPs, group = 1)) +
|
||||||
|
stat_summary(fun = mean, geom="point") +
|
||||||
|
stat_summary(fun = mean, geom="line") +
|
||||||
|
ggtitle("Mismatches per sample") +
|
||||||
|
#geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/mps.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
#Number of deletions per cycle of sequence
|
||||||
|
|
||||||
|
numbers_only <- function(x) !grepl("\\D", x)
|
||||||
|
|
||||||
|
dels2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
|
colnames(dels2)<-c("Num", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- table_set[table_set$Sample==i,][,10]
|
||||||
|
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
||||||
|
temp <- unlist(strsplit(temp,","))
|
||||||
|
temp<- temp[numbers_only(temp)]
|
||||||
|
temp<-cbind("Num"=temp, "Sample" = i)
|
||||||
|
temp<-data.frame(temp)
|
||||||
|
dels2<-rbind(temp, dels2)
|
||||||
|
}
|
||||||
|
dels2$Num<-as.numeric(dels2$Num)
|
||||||
|
ggplot(dels2, aes(x = Num)) +
|
||||||
|
#geom_density(stat = "count", alpha = 0.5) +
|
||||||
|
stat_count(geom="line", position="identity") +
|
||||||
|
stat_count(geom="point", position="identity", size = 1) +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
#ylim(0, 1500) +
|
||||||
|
ggtitle ("Deletions per cycle") +
|
||||||
|
theme_bw()+
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/dpc.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
rm (dels2, temp)
|
||||||
|
|
||||||
|
#Number of insertions per cycle of sequence
|
||||||
|
|
||||||
|
ins2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
|
colnames(ins2)<-c("Num", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- table_set[table_set$Sample==i,][,11]
|
||||||
|
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
||||||
|
temp <- unlist(strsplit(temp,","))
|
||||||
|
temp<- temp[numbers_only(temp)]
|
||||||
|
temp<-cbind("Num"=temp, "Sample" = i)
|
||||||
|
temp<-data.frame(temp)
|
||||||
|
ins2<-rbind(temp, ins2)
|
||||||
|
}
|
||||||
|
ins2$Num<-as.numeric(ins2$Num)
|
||||||
|
ggplot(ins2, aes(x = Num)) +
|
||||||
|
stat_count(geom="line", position="identity") +
|
||||||
|
stat_count(geom="point", position="identity", size = 1) +
|
||||||
|
facet_wrap(~Sample, nrow = 3) +
|
||||||
|
#ylim(0, 1500) +
|
||||||
|
ggtitle ("Insertions per cycle") +
|
||||||
|
scale_x_continuous(breaks=seq(0, 150, 25))+
|
||||||
|
theme_bw()+
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/ipc.png", width = 2250, height = 1500, units = "px")
|
||||||
|
rm (ins2, temp)
|
||||||
|
|
||||||
|
#Number of deletions per sample
|
||||||
|
|
||||||
|
parse_delets2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
|
colnames(parse_delets2)<-c("Num", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- table_set[table_set$Sample==i,][,10]
|
||||||
|
temp <- unlist(strsplit(temp,split="|",fixed = TRUE))
|
||||||
|
temp<-cbind("Num"=temp, "Sample" = i)
|
||||||
|
temp<-data.frame(temp)
|
||||||
|
parse_delets2<-rbind(temp, parse_delets2)
|
||||||
|
}
|
||||||
|
ggplot(parse_delets2, aes(x=Sample, group = 1)) +
|
||||||
|
stat_count(geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
|
ggtitle("Deletions per sample") +
|
||||||
|
#geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/dps.png", width = 2250, height = 1500, units = "px")
|
||||||
|
rm (parse_delets2, temp)
|
||||||
|
|
||||||
|
#Number of insertions per sample
|
||||||
|
|
||||||
|
parse_inserts2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
|
colnames(parse_inserts2)<-c("Num", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- table_set[table_set$Sample==i,][,11]
|
||||||
|
temp <- unlist(strsplit(temp,split="|",fixed = TRUE))
|
||||||
|
temp<-cbind("Num"=temp, "Sample" = i)
|
||||||
|
temp<-data.frame(temp)
|
||||||
|
parse_inserts2<-rbind(temp, parse_inserts2)
|
||||||
|
}
|
||||||
|
ggplot(parse_inserts2, aes(x=Sample, group = 1)) +
|
||||||
|
stat_count(geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
|
ggtitle("Insertions per sample") +
|
||||||
|
#geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
|
ggsave("../report/images_assembly_zvl/ips.png", width = 2250, height = 1500, units = "px")
|
||||||
|
rm (parse_inserts2, temp)
|
||||||
|
|
||||||
|
#Frequency of insertion motifs per sample
|
||||||
|
|
||||||
|
insert_motifs2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
|
colnames(insert_motifs2)<-c("Num", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- table_set[table_set$Sample==i,][,11]
|
||||||
|
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
||||||
|
temp <- unlist(strsplit(temp,","))
|
||||||
|
temp <- temp[!numbers_only(temp)]
|
||||||
|
temp <- cbind("Num"=temp, "Sample" = i)
|
||||||
|
temp <- data.frame(temp)
|
||||||
|
insert_motifs2 <- rbind(temp, insert_motifs2)
|
||||||
|
}
|
||||||
|
ggplot(insert_motifs2, aes(x = Num)) +
|
||||||
|
stat_count(geom='bar', width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
|
facet_wrap(~Sample, nrow=4, scales = "free_y") +
|
||||||
|
ggtitle("Insertion motifs per sample") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
||||||
|
ggsave("../report/images_assembly_zvl/imps.png", width = 8250, height = 2500, units = "px")
|
||||||
|
rm (temp, insert_motifs2)
|
||||||
|
|
||||||
|
#Frequency of deletion motifs per sample
|
||||||
|
|
||||||
|
delete_motifs2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
|
colnames(delete_motifs2)<-c("Num", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
|
temp <- table_set[table_set$Sample==i,][,10]
|
||||||
|
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
||||||
|
temp <- unlist(strsplit(temp,","))
|
||||||
|
temp <- temp[!numbers_only(temp)]
|
||||||
|
temp <- cbind("Num"=temp, "Sample" = i)
|
||||||
|
temp <- data.frame(temp)
|
||||||
|
delete_motifs2 <- rbind(temp, delete_motifs2)
|
||||||
|
}
|
||||||
|
ggplot(delete_motifs2, aes(x = Num)) +
|
||||||
|
stat_count(geom='bar', width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
|
facet_wrap(~Sample, nrow=7, scales = "free_y") +
|
||||||
|
ggtitle("Deletions motifs per sample") +
|
||||||
|
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
||||||
|
ggsave("../report/images_assembly_zvl/dmps.png", width = 8250, height = 3500, units = "px")
|
||||||
|
rm (delete_motifs2, temp)
|
239
plasmid.R
239
plasmid.R
@ -7,67 +7,60 @@ library(stringr)
|
|||||||
|
|
||||||
#Uploading data in R
|
#Uploading data in R
|
||||||
|
|
||||||
t1<-read.csv("../custom_1_90_mapq.txt", sep=";")
|
temp_name<-read.table("names_of_custom_plasmid.txt")
|
||||||
t2<-read.csv("../custom_2_87_mapq.txt", sep=";")
|
samples <- c()
|
||||||
t4<-read.csv("../custom_4_96_mapq.txt", sep=";")
|
for (i in temp_name[,c("V1")]) {
|
||||||
t5<-read.csv("../custom_5_92_mapq.txt", sep=";")
|
samples<-c(samples, gsub("_plasmid_mapq20_table.txt", "", gsub("custom_", "", i)))
|
||||||
t6<-read.csv("../custom_6_108_mapq.txt", sep=";")
|
|
||||||
t7<-read.csv("../custom_7_97_mapq.txt", sep=";")
|
|
||||||
tpol<-read.csv("../custom_pol_93_mapq.txt", sep=";")
|
|
||||||
|
|
||||||
tp<-cbind(t1, "Sample" =1)
|
|
||||||
tp<-rbind(tp, cbind(t2, "Sample" =2))
|
|
||||||
tp<-rbind(tp, cbind(t4, "Sample" =4))
|
|
||||||
tp<-rbind(tp, cbind(t5, "Sample" =5))
|
|
||||||
tp<-rbind(tp, cbind(t6, "Sample" =6))
|
|
||||||
tp<-rbind(tp, cbind(t7, "Sample" =7))
|
|
||||||
tp<-rbind(tp, cbind(tpol, "Sample" =9))
|
|
||||||
|
|
||||||
samples<-c(2, 4:7, 9)
|
|
||||||
|
|
||||||
sample2<-subset(tp, Read==2&Sample==1)
|
|
||||||
tp2<-sample2[sample(nrow(sample2), 20000),]
|
|
||||||
for (i in samples) {
|
|
||||||
sample2<-subset(tp, Read==2&Sample==i)
|
|
||||||
tp2<-rbind(tp2, sample2[sample(nrow(sample2), 20000),])
|
|
||||||
}
|
}
|
||||||
rm(sample2, t1, t2, t4, t5, t6, t7, tpol)
|
tp2 <- data.frame(matrix(ncol = 24, nrow = 0))
|
||||||
|
colnames(tp2) <- unlist(strsplit("ID;Read;Count_SNPs;Cycles;GC;Mean_length_del;Mean_length_insert;Read_length;Align_length;Dels;Inserts;A>C;A>G;A>T;C>A;C>G;C>T;G>A;G>C;G>T;T>A;T>C;T>G",";"))
|
||||||
|
for (i in temp_name[,c("V1")]) {
|
||||||
|
temp<-read.csv(i, sep=";", header=TRUE)
|
||||||
|
temp<-subset(temp, Read==2)
|
||||||
|
temp<-temp[sample(nrow(temp), 200000),]
|
||||||
|
temp<-cbind(temp, "Sample"=gsub("_plasmid_mapq20_table.txt", "", gsub("custom_", "", i)))
|
||||||
|
tp2<-rbind(tp2, temp)
|
||||||
|
}
|
||||||
|
rm(temp, temp_name)
|
||||||
|
tp2$Sample<-as.factor(tp2$Sample)
|
||||||
|
dir.create("../report/images_new_plasmid")
|
||||||
|
|
||||||
#GC-content
|
#GC-content
|
||||||
|
|
||||||
ggplot(data = tp2, aes(x = GC)) +
|
ggplot(data = tp2, aes(x = GC)) +
|
||||||
geom_density(stat = "density", fill = "pink", alpha = 0.5) +
|
geom_density(stat = "density", fill = "pink", alpha = 0.5) +
|
||||||
facet_wrap(~Sample, nrow = 3) +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
geom_vline(xintercept = 50, linetype="dotted") +
|
geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
xlim(25, 75) +
|
xlim(25, 75) +
|
||||||
ylim(0, 0.15) +
|
ylim(0, 0.15) +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/gc.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/gc.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
#Mismatches per cycle of sequence
|
#Mismatches per cycle of sequence
|
||||||
|
|
||||||
df2<-data.frame("Cycles"=0, "Sample"=1)
|
df2<-data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
for (i in c(1:2, 4:7, 9)) {
|
colnames(df2)<-c("Cycles", "Sample")
|
||||||
|
for (i in samples) {
|
||||||
cyc <-subset(tp2, Sample == i)[,4]
|
cyc <-subset(tp2, Sample == i)[,4]
|
||||||
cyc <- as.numeric(unlist(strsplit(cyc,",")))
|
cyc <- as.numeric(unlist(strsplit(cyc,",")))
|
||||||
df2<-rbind(df2, cbind("Cycles" = cyc, "Sample" =i))
|
df2<-rbind(df2, cbind("Cycles" = cyc, "Sample" =i))
|
||||||
}
|
}
|
||||||
df2<-df2[-1,]
|
|
||||||
df2$Sample<-as.factor(df2$Sample)
|
df2$Sample<-as.factor(df2$Sample)
|
||||||
|
df2$Cycles<-as.numeric(df2$Cycles)
|
||||||
rm (cyc, i)
|
rm (cyc, i)
|
||||||
ggplot(df2, aes(x = Cycles)) +
|
ggplot(df2, aes(x = Cycles)) +
|
||||||
geom_density(stat = "count", fill = "blue", alpha = 0.5) +
|
geom_density(stat = "count", fill = "blue", alpha = 0.5) +
|
||||||
#geom_line(stat = "count", fill = "blue", alpha = 0.5) +
|
#geom_line(stat = "count", fill = "blue", alpha = 0.5) +
|
||||||
facet_wrap(~Sample, nrow = 3) +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
ggtitle("Mismatches per cycle") +
|
ggtitle("Mismatches per cycle") +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/mpc.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/mpc.png", width = 2250, height = 1500, units = "px")
|
||||||
ggplot(df2, aes(x = Cycles)) +
|
ggplot(df2, aes(x = Cycles)) +
|
||||||
geom_density(stat = "density", fill = "blue", alpha = 0.5) +
|
geom_density(stat = "density", fill = "blue", alpha = 0.5) +
|
||||||
facet_wrap(~Sample, nrow = 3) +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
ggtitle("Mismatches per cycle") +
|
ggtitle("Mismatches per cycle") +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/mpc_density.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/mpc_density.png", width = 2250, height = 1500, units = "px")
|
||||||
rm (df2)
|
rm (df2)
|
||||||
|
|
||||||
#Mismatches depending on GC-content
|
#Mismatches depending on GC-content
|
||||||
@ -75,28 +68,26 @@ rm (df2)
|
|||||||
mgc2<-cbind(tp2[, c("Count_SNPs", "Sample")], "GC" = round(tp2$GC))
|
mgc2<-cbind(tp2[, c("Count_SNPs", "Sample")], "GC" = round(tp2$GC))
|
||||||
ggplot(mgc2, aes(x=GC, y=Count_SNPs)) +
|
ggplot(mgc2, aes(x=GC, y=Count_SNPs)) +
|
||||||
stat_summary(fun = sum, geom="line", linewidth = 0.5) +
|
stat_summary(fun = sum, geom="line", linewidth = 0.5) +
|
||||||
facet_wrap(~Sample, nrow = 3) +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
ggtitle("Mismatches/GC") +
|
ggtitle("Mismatches/GC") +
|
||||||
geom_vline(xintercept = 50, linetype="dotted") +
|
geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
theme_bw()+
|
theme_bw()+
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/mgc.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/mgc.png", width = 2250, height = 1500, units = "px")
|
||||||
ggplot(mgc2, aes(x=GC, y=Count_SNPs)) +
|
ggplot(mgc2, aes(x=GC, y=Count_SNPs)) +
|
||||||
stat_summary(fun = mean, geom="line", size = 0.5) +
|
stat_summary(fun = mean, geom="line", size = 0.5) +
|
||||||
facet_wrap(~Sample, nrow = 3) +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
ggtitle("Mismatches/GC") +
|
ggtitle("Mismatches/GC") +
|
||||||
geom_vline(xintercept = 50, linetype="dotted") +
|
geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
theme_bw()+
|
theme_bw()+
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/mgc_mean.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/mgc_mean.png", width = 2250, height = 1500, units = "px")
|
||||||
rm (mgc2)
|
rm (mgc2)
|
||||||
|
|
||||||
#Frequency of different types of mismatches
|
#Frequency of different types of mismatches
|
||||||
|
|
||||||
mtype2 <- apply(tp2[, c(12:23)][tp2$Sample==1,], 2, sum)
|
mtype2 <- data.frame(matrix(ncol = 3, nrow = 0))
|
||||||
mtype2 <- data.frame("Sum" = mtype2)
|
colnames(mtype2)<- c("Sum", "type", "Sample")
|
||||||
mtype2$type<-as.factor(colnames(tp2[, c(12:23)]))
|
|
||||||
mtype2<-cbind(mtype2, "Sample" = 1)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- apply(tp2[, c(12:23)][tp2$Sample==i,], 2, sum)
|
temp <- apply(tp2[, c(12:23)][tp2$Sample==i,], 2, sum)
|
||||||
temp <- data.frame("Sum" = temp)
|
temp <- data.frame("Sum" = temp)
|
||||||
@ -107,17 +98,14 @@ for (i in samples) {
|
|||||||
ggplot(mtype2) +
|
ggplot(mtype2) +
|
||||||
geom_line(aes(x = type, y = Sum, group = Sample)) +
|
geom_line(aes(x = type, y = Sum, group = Sample)) +
|
||||||
geom_point(aes(x = type, y = Sum)) +
|
geom_point(aes(x = type, y = Sum)) +
|
||||||
#facet_wrap(~Sample, nrow = 3, scales = "free_y") +
|
#facet_wrap(~Sample, nrow = 2, scales = "free_y") +
|
||||||
facet_wrap(~Sample, nrow = 3, ) +
|
facet_wrap(~Sample, nrow = 2, ) +
|
||||||
ggtitle("Mismatch type") +
|
ggtitle("Mismatch type") +
|
||||||
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
||||||
ggsave("../report/images_read2/mt.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/mt.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
mtype2_2 <- data.frame(matrix(ncol = 3, nrow = 0))
|
||||||
mtype2_2 <- apply(tp2[tp2$Sample==1, c(12:23)], 2, mean)
|
colnames(mtype2_2)<- c("mean", "type", "Sample")
|
||||||
mtype2_2 <- data.frame("mean" = mtype2_2)
|
|
||||||
mtype2_2$type<-as.factor(colnames(tp2[, c(12:23)]))
|
|
||||||
mtype2_2<-cbind(mtype2_2, "Sample" = 1)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- apply(tp2[tp2$Sample==i, c(12:23)], 2, mean)
|
temp <- apply(tp2[tp2$Sample==i, c(12:23)], 2, mean)
|
||||||
temp <- data.frame("mean" = temp)
|
temp <- data.frame("mean" = temp)
|
||||||
@ -128,21 +116,17 @@ for (i in samples) {
|
|||||||
ggplot(mtype2_2) +
|
ggplot(mtype2_2) +
|
||||||
geom_line(aes(x = type, y = mean, group = Sample)) +
|
geom_line(aes(x = type, y = mean, group = Sample)) +
|
||||||
geom_point(aes(x = type, y = mean)) +
|
geom_point(aes(x = type, y = mean)) +
|
||||||
#facet_wrap(~Sample, nrow = 3, scales = "free_y") +
|
#facet_wrap(~Sample, nrow = 2, scales = "free_y") +
|
||||||
facet_wrap(~Sample, nrow = 3) +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
ggtitle("Mismatch type") +
|
ggtitle("Mismatch type") +
|
||||||
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
||||||
ggsave("../report/images_read2/mt_mean.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/mt_mean.png", width = 2250, height = 1500, units = "px")
|
||||||
rm(mtype2, mtype2_2)
|
rm(mtype2, mtype2_2)
|
||||||
|
|
||||||
#Number of deletions per sample
|
#Number of deletions per sample
|
||||||
|
|
||||||
delets2 <- tp2[tp2$Sample==1,][,10]
|
delets2 <- data.frame(matrix(ncol = 2, nrow = 0))
|
||||||
delets2 <- unlist(strsplit(delets2,""))
|
colnames(delets2)<- c("Nucleotide", "Sample")
|
||||||
delets2<- delets2[delets2 %in% c(letters, LETTERS)]
|
|
||||||
delets2<-cbind("Nucleotide"=delets2, "Sample" = 1)
|
|
||||||
delets2<-data.frame(delets2)
|
|
||||||
delets2$Nucleotide <- as.factor(delets2$Nucleotide)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- tp2[tp2$Sample==i,][,10]
|
temp <- tp2[tp2$Sample==i,][,10]
|
||||||
temp <- unlist(strsplit(temp,""))
|
temp <- unlist(strsplit(temp,""))
|
||||||
@ -153,22 +137,18 @@ for (i in samples) {
|
|||||||
delets2<-rbind(temp, delets2)
|
delets2<-rbind(temp, delets2)
|
||||||
}
|
}
|
||||||
ggplot(delets2) +
|
ggplot(delets2) +
|
||||||
geom_bar(aes(x = Nucleotide), width = 0.25, fill = "orange") +
|
geom_bar(aes(x = Nucleotide), width = 0.4, fill = "blue", alpha = 0.5, col = "black") +
|
||||||
facet_wrap(~Sample, nrow = 3, scales = "free_y") +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
#ylim(0, 1500) +
|
#ylim(0, 1500) +
|
||||||
ggtitle ("Deletes") +
|
ggtitle ("Deletions") +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/delets.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/delets.png", width = 2250, height = 1500, units = "px")
|
||||||
rm (delets2)
|
rm (delets2)
|
||||||
|
|
||||||
#Number of insertions per sample
|
#Number of insertions per sample
|
||||||
|
|
||||||
inserts2 <- tp2[tp2$Sample==1,][,11]
|
inserts2 <- data.frame(matrix(ncol = 2, nrow = 0))
|
||||||
inserts2 <- unlist(strsplit(inserts2,""))
|
colnames(inserts2)<- c("Nucleotide", "Sample")
|
||||||
inserts2<- inserts2[inserts2 %in% c(letters, LETTERS)]
|
|
||||||
inserts2<-cbind("Nucleotide"=inserts2, "Sample" = 1)
|
|
||||||
inserts2<-data.frame(inserts2)
|
|
||||||
inserts2$Nucleotide <- as.factor(inserts2$Nucleotide)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- tp2[tp2$Sample==i,][,11]
|
temp <- tp2[tp2$Sample==i,][,11]
|
||||||
temp <- unlist(strsplit(temp,""))
|
temp <- unlist(strsplit(temp,""))
|
||||||
@ -179,84 +159,86 @@ for (i in samples) {
|
|||||||
inserts2<-rbind(temp, inserts2)
|
inserts2<-rbind(temp, inserts2)
|
||||||
}
|
}
|
||||||
ggplot(inserts2) +
|
ggplot(inserts2) +
|
||||||
ggtitle("Inserts") +
|
ggtitle("Insertions") +
|
||||||
geom_bar(aes(x = Nucleotide), width = 0.25, fill = "orange") +
|
geom_bar(aes(x = Nucleotide), width = 0.4, fill = "blue", alpha = 0.5, col = "black") +
|
||||||
facet_wrap(~Sample, nrow = 2, scales = "free_y") +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
#ylim(0, 2000) +
|
#ylim(0, 2000) +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/inserts.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/inserts.png", width = 2250, height = 1500, units = "px")
|
||||||
rm (inserts2)
|
rm (inserts2)
|
||||||
|
|
||||||
#Mean, min and max values of insertions per sample
|
#Mean, min and max values of insertions per sample
|
||||||
|
|
||||||
insert_mean2<-subset(tp2, Sample == 1)[,7]
|
insert_mean2 <- data.frame(matrix(ncol = 7, nrow = 0))
|
||||||
insert_mean2<-cbind("Sample"=1, t(summary(insert_mean2)))
|
colnames(insert_mean2)<- c("Sample", "Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp<-subset(tp2, Mean_length_insert!=0 & Sample == i)[,7]
|
temp<-subset(tp2, Mean_length_del!=0 & Sample == i)[,7]
|
||||||
temp<-cbind("Sample"=i, t(summary(temp)))
|
temp<-cbind("Sample"=i, t(summary(temp)))
|
||||||
insert_mean2<-rbind(insert_mean2, temp)
|
insert_mean2<-rbind(insert_mean2, temp)
|
||||||
}
|
}
|
||||||
|
insert_mean2[, 2]<-as.numeric(insert_mean2[, 2])
|
||||||
|
insert_mean2[, 4]<-as.numeric(insert_mean2[, 4])
|
||||||
|
insert_mean2[, 5]<-as.numeric(insert_mean2[, 5])
|
||||||
|
insert_mean2[, 7]<-as.numeric(insert_mean2[, 7])
|
||||||
ggplot(data.frame(insert_mean2), aes(x = factor(Sample), y = Mean), size = 0.5) +
|
ggplot(data.frame(insert_mean2), aes(x = factor(Sample), y = Mean), size = 0.5) +
|
||||||
geom_pointrange(aes(ymin = 0, ymax = Max.)) +
|
geom_pointrange(aes(ymin = 0, ymax = Max.)) +
|
||||||
scale_y_continuous(breaks = c(0:12), limits = c(0, 13)) +
|
scale_y_continuous(name = "Length") +
|
||||||
geom_point(aes(x = factor(Sample), y = 0), shape = 1) +
|
geom_point(aes(x = factor(Sample), y = 0), shape = 1) +
|
||||||
geom_point(aes(x = factor(Sample), y = Max.), shape = 1) +
|
geom_point(aes(x = factor(Sample), y = Max.), shape = 1) +
|
||||||
ggtitle("Insert mean length") +
|
ggtitle("Insertion mean length") +
|
||||||
scale_x_discrete(name = "Sample") +
|
scale_x_discrete(name = "Sample") +
|
||||||
scale_y_continuous(name = "Length", breaks = seq(0, 18, 1)) +
|
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/iml.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/iml.png", width = 2250, height = 1500, units = "px")
|
||||||
rm (insert_mean2)
|
rm (insert_mean2)
|
||||||
|
|
||||||
#Mean, min and max values of deletions per sample
|
#Mean, min and max values of deletions per sample
|
||||||
|
|
||||||
delete_mean2<-subset(tp2, Sample == 1)[,6]
|
delete_mean2 <- data.frame(matrix(ncol = 7, nrow = 0))
|
||||||
delete_mean2<-cbind("Sample"=1, t(summary(delete_mean2)))
|
colnames(delete_mean2)<- c("Sample", "Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp<-subset(tp2, Mean_length_del!=0 & Sample == i)[,6]
|
temp<-subset(tp2, Mean_length_del!=0 & Sample == i)[,6]
|
||||||
temp<-cbind("Sample"=i, t(summary(temp)))
|
temp<-cbind("Sample"=i, t(summary(temp)))
|
||||||
delete_mean2<-rbind(delete_mean2, temp)
|
delete_mean2<-rbind(delete_mean2, temp)
|
||||||
}
|
}
|
||||||
|
delete_mean2[, 2]<-as.numeric(delete_mean2[, 2])
|
||||||
|
delete_mean2[, 4]<-as.numeric(delete_mean2[, 4])
|
||||||
|
delete_mean2[, 5]<-as.numeric(delete_mean2[, 5])
|
||||||
|
delete_mean2[, 7]<-as.numeric(delete_mean2[, 7])
|
||||||
ggplot(data.frame(delete_mean2), aes(x = factor(Sample), y = Mean), size = 0.5) +
|
ggplot(data.frame(delete_mean2), aes(x = factor(Sample), y = Mean), size = 0.5) +
|
||||||
geom_pointrange(aes(ymin = 0, ymax = Max.)) +
|
geom_pointrange(aes(ymin = 0, ymax = Max.)) +
|
||||||
scale_y_continuous(breaks = c(0:22), limits = c(0, 22)) +
|
|
||||||
geom_point(aes(x = factor(Sample), y = 0), shape = 1) +
|
geom_point(aes(x = factor(Sample), y = 0), shape = 1) +
|
||||||
geom_point(aes(x = factor(Sample), y = Max.), shape = 1) +
|
geom_point(aes(x = factor(Sample), y = Max.), shape = 1) +
|
||||||
ggtitle("Delete mean length") +
|
ggtitle("Deletion mean length") +
|
||||||
scale_x_discrete(name = "Sample") +
|
scale_x_discrete(name = "Sample") +
|
||||||
scale_y_continuous(name = "Length", breaks = seq(0, 12, 1)) +
|
scale_y_continuous(name = "Length") +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/dml.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/dml.png", width = 2250, height = 1500, units = "px")
|
||||||
rm(delete_mean2)
|
rm(delete_mean2)
|
||||||
|
|
||||||
#Number of mismatches per sample
|
#Number of mismatches per sample
|
||||||
|
|
||||||
ggplot(tp2, aes(x=Sample, y=Count_SNPs, group = 1)) +
|
ggplot(tp2, aes(x=Sample, y=Count_SNPs, group = 1)) +
|
||||||
stat_summary(fun = mean, geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
stat_summary(fun = sum, geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
ggtitle("Mismatches per sample") +
|
ggtitle("Mismatches per sample") +
|
||||||
#geom_vline(xintercept = 50, linetype="dotted") +
|
#geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/mps_mean.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/mps_mean.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
|
|
||||||
ggplot(tp2, aes(x=Sample, y=Count_SNPs, group = 1)) +
|
ggplot(tp2, aes(x=Sample, y=Count_SNPs, group = 1)) +
|
||||||
stat_summary(fun = sum, geom="point") +
|
stat_summary(fun = mean, geom="point") +
|
||||||
stat_summary(fun = sum, geom="line") +
|
stat_summary(fun = mean, geom="line") +
|
||||||
ggtitle("Mismatches per sample") +
|
ggtitle("Mismatches per sample") +
|
||||||
#geom_vline(xintercept = 50, linetype="dotted") +
|
#geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/mps.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/mps.png", width = 2250, height = 1500, units = "px")
|
||||||
|
|
||||||
#Number of deletions per cycle of sequence
|
#Number of deletions per cycle of sequence
|
||||||
|
|
||||||
numbers_only <- function(x) !grepl("\\D", x)
|
numbers_only <- function(x) !grepl("\\D", x)
|
||||||
|
|
||||||
dels2 <- tp2[tp2$Sample==1,][,10]
|
dels2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
dels2 <- unlist(strsplit(dels2, split="|",fixed = TRUE))
|
colnames(dels2)<-c("Num", "Sample")
|
||||||
dels2 <- unlist(strsplit(dels2,","))
|
|
||||||
dels2<- dels2[numbers_only(dels2)]
|
|
||||||
dels2<-cbind("Num"=dels2, "Sample" = 1)
|
|
||||||
dels2<-data.frame(dels2)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- tp2[tp2$Sample==i,][,10]
|
temp <- tp2[tp2$Sample==i,][,10]
|
||||||
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
||||||
@ -271,22 +253,18 @@ ggplot(dels2, aes(x = Num)) +
|
|||||||
#geom_density(stat = "count", alpha = 0.5) +
|
#geom_density(stat = "count", alpha = 0.5) +
|
||||||
stat_count(geom="line", position="identity") +
|
stat_count(geom="line", position="identity") +
|
||||||
stat_count(geom="point", position="identity", size = 1) +
|
stat_count(geom="point", position="identity", size = 1) +
|
||||||
facet_wrap(~Sample, nrow = 3, scales = "free_y") +
|
facet_wrap(~Sample, nrow = 2) +
|
||||||
#ylim(0, 1500) +
|
#ylim(0, 1500) +
|
||||||
ggtitle ("Deletes per cycle") +
|
ggtitle ("Deletions per cycle") +
|
||||||
theme_bw()+
|
theme_bw()+
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/dpc.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/dpc.png", width = 2250, height = 1500, units = "px")
|
||||||
rm(dels2)
|
rm(dels2)
|
||||||
|
|
||||||
#Number of insertions per cycle of sequence
|
#Number of insertions per cycle of sequence
|
||||||
|
|
||||||
ins2 <- tp2[tp2$Sample==1,][,11]
|
ins2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
ins2 <- unlist(strsplit(ins2, split="|",fixed = TRUE))
|
colnames(ins2)<-c("Num", "Sample")
|
||||||
ins2 <- unlist(strsplit(ins2,","))
|
|
||||||
ins2<- ins2[numbers_only(ins2)]
|
|
||||||
ins2<-cbind("Num"=ins2, "Sample" = 1)
|
|
||||||
ins2<-data.frame(ins2)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- tp2[tp2$Sample==i,][,11]
|
temp <- tp2[tp2$Sample==i,][,11]
|
||||||
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
||||||
@ -300,22 +278,19 @@ ins2$Num<-as.numeric(ins2$Num)
|
|||||||
ggplot(ins2, aes(x = Num)) +
|
ggplot(ins2, aes(x = Num)) +
|
||||||
stat_count(geom="line", position="identity") +
|
stat_count(geom="line", position="identity") +
|
||||||
stat_count(geom="point", position="identity", size = 1) +
|
stat_count(geom="point", position="identity", size = 1) +
|
||||||
facet_wrap(~Sample, nrow = 3, scales = "free_y") +
|
facet_wrap(~Sample, nrow = 2, scales = "free_y") +
|
||||||
#ylim(0, 1500) +
|
#ylim(0, 1500) +
|
||||||
ggtitle ("Inserts per cycle") +
|
ggtitle ("Insertions per cycle") +
|
||||||
scale_x_continuous(breaks=seq(0, 150, 25))+
|
scale_x_continuous(breaks=seq(0, 150, 25))+
|
||||||
scale_y_continuous(drop=FALSE)
|
|
||||||
theme_bw()+
|
theme_bw()+
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/ipc.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/ipc.png", width = 2250, height = 1500, units = "px")
|
||||||
rm (ins2)
|
rm (ins2)
|
||||||
|
|
||||||
#Number of deletions per sample
|
#Number of deletions per sample
|
||||||
|
|
||||||
parse_delets2 <- tp2[tp2$Sample==1,][,10]
|
parse_delets2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
parse_delets2 <- unlist(strsplit(parse_delets2, split="|",fixed = TRUE))
|
colnames(parse_delets2)<-c("Num", "Sample")
|
||||||
parse_delets2<-cbind("Num"=parse_delets2, "Sample" = 1)
|
|
||||||
parse_delets2<-data.frame(parse_delets2)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- tp2[tp2$Sample==i,][,10]
|
temp <- tp2[tp2$Sample==i,][,10]
|
||||||
temp <- unlist(strsplit(temp,split="|",fixed = TRUE))
|
temp <- unlist(strsplit(temp,split="|",fixed = TRUE))
|
||||||
@ -325,18 +300,16 @@ for (i in samples) {
|
|||||||
}
|
}
|
||||||
ggplot(parse_delets2, aes(x=Sample, group = 1)) +
|
ggplot(parse_delets2, aes(x=Sample, group = 1)) +
|
||||||
stat_count(geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
stat_count(geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
ggtitle("Deletes per sample") +
|
ggtitle("Deletions per sample") +
|
||||||
#geom_vline(xintercept = 50, linetype="dotted") +
|
#geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/dps.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/dps.png", width = 2250, height = 1500, units = "px")
|
||||||
rm (parse_delets2)
|
rm (parse_delets2)
|
||||||
|
|
||||||
#Number of insertions per sample
|
#Number of insertions per sample
|
||||||
|
|
||||||
parse_inserts2 <- tp2[tp2$Sample==1,][,11]
|
parse_inserts2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
parse_inserts2 <- unlist(strsplit(parse_inserts2, split="|",fixed = TRUE))
|
colnames(parse_inserts2)<-c("Num", "Sample")
|
||||||
parse_inserts2<-cbind("Num"=parse_inserts2, "Sample" = 1)
|
|
||||||
parse_inserts2<-data.frame(parse_inserts2)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- tp2[tp2$Sample==i,][,11]
|
temp <- tp2[tp2$Sample==i,][,11]
|
||||||
temp <- unlist(strsplit(temp,split="|",fixed = TRUE))
|
temp <- unlist(strsplit(temp,split="|",fixed = TRUE))
|
||||||
@ -346,20 +319,16 @@ for (i in samples) {
|
|||||||
}
|
}
|
||||||
ggplot(parse_inserts2, aes(x=Sample, group = 1)) +
|
ggplot(parse_inserts2, aes(x=Sample, group = 1)) +
|
||||||
stat_count(geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
stat_count(geom="bar", width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
ggtitle("Inserts per sample") +
|
ggtitle("Insertions per sample") +
|
||||||
#geom_vline(xintercept = 50, linetype="dotted") +
|
#geom_vline(xintercept = 50, linetype="dotted") +
|
||||||
theme(plot.title = element_text(hjust = 0.5))
|
theme(plot.title = element_text(hjust = 0.5))
|
||||||
ggsave("../report/images_read2/ips.png", width = 2250, height = 1500, units = "px")
|
ggsave("../report/images_new_plasmid/ips.png", width = 2250, height = 1500, units = "px")
|
||||||
rm (parse_inserts2, temp)
|
rm (parse_inserts2, temp)
|
||||||
|
|
||||||
#Frequency of insertion motifs per sample
|
#Frequency of insertion motifs per sample
|
||||||
|
|
||||||
insert_motifs2 <- tp2[tp2$Sample==1,][,11]
|
insert_motifs2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
insert_motifs2 <- unlist(strsplit(insert_motifs2, split="|",fixed = TRUE))
|
colnames(insert_motifs2)<-c("Num", "Sample")
|
||||||
insert_motifs2 <- unlist(strsplit(insert_motifs2,","))
|
|
||||||
insert_motifs2<- insert_motifs2[!numbers_only(insert_motifs2)]
|
|
||||||
insert_motifs2<-cbind("Num"=insert_motifs2, "Sample" = 1)
|
|
||||||
insert_motifs2<-data.frame(insert_motifs2)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- tp2[tp2$Sample==i,][,11]
|
temp <- tp2[tp2$Sample==i,][,11]
|
||||||
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
||||||
@ -372,19 +341,15 @@ for (i in samples) {
|
|||||||
ggplot(insert_motifs2, aes(x = Num)) +
|
ggplot(insert_motifs2, aes(x = Num)) +
|
||||||
stat_count(geom='bar', width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
stat_count(geom='bar', width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
facet_wrap(~Sample, nrow=4, scales = "free_y") +
|
facet_wrap(~Sample, nrow=4, scales = "free_y") +
|
||||||
ggtitle("Insert motifs per sample") +
|
ggtitle("Insertion motifs per sample") +
|
||||||
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
||||||
ggsave("../report/images_read2/imps.png", width = 2250, height = 2500, units = "px")
|
ggsave("../report/images_new_plasmid/imps.png", width = 3250, height = 2500, units = "px")
|
||||||
rm (insert_motifs2, temp)
|
rm (insert_motifs2, temp)
|
||||||
|
|
||||||
#Frequency of deletion motifs per sample
|
#Frequency of deletion motifs per sample
|
||||||
|
|
||||||
delete_motifs2 <- tp2[tp2$Sample==1,][,10]
|
delete_motifs2 <- data.frame(matrix(nrow = 0, ncol = 2))
|
||||||
delete_motifs2 <- unlist(strsplit(delete_motifs2, split="|",fixed = TRUE))
|
colnames(delete_motifs2)<-c("Num", "Sample")
|
||||||
delete_motifs2 <- unlist(strsplit(delete_motifs2,","))
|
|
||||||
delete_motifs2<- delete_motifs2[!numbers_only(delete_motifs2)]
|
|
||||||
delete_motifs2<-cbind("Num"=delete_motifs2, "Sample" = 1)
|
|
||||||
delete_motifs2<-data.frame(delete_motifs2)
|
|
||||||
for (i in samples) {
|
for (i in samples) {
|
||||||
temp <- tp2[tp2$Sample==i,][,10]
|
temp <- tp2[tp2$Sample==i,][,10]
|
||||||
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
temp <- unlist(strsplit(temp, split="|",fixed = TRUE))
|
||||||
@ -397,7 +362,7 @@ for (i in samples) {
|
|||||||
ggplot(delete_motifs2, aes(x = Num)) +
|
ggplot(delete_motifs2, aes(x = Num)) +
|
||||||
stat_count(geom='bar', width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
stat_count(geom='bar', width = 0.7, fill = "white", color = "black", alpha = 0.7) +
|
||||||
facet_wrap(~Sample, nrow=4, scales = "free_y") +
|
facet_wrap(~Sample, nrow=4, scales = "free_y") +
|
||||||
ggtitle("Deletes motifs per sample") +
|
ggtitle("Deletions motifs per sample") +
|
||||||
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
theme(plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 90, hjust = 1))
|
||||||
ggsave("../report/images_read2/dmps.png", width = 2250, height = 2500, units = "px")
|
ggsave("../report/images_new_plasmid/dmps.png", width = 4550, height = 2500, units = "px")
|
||||||
rm (delete_motifs2, temp)
|
rm (delete_motifs2, temp)
|
Loading…
Reference in New Issue
Block a user