|
| 1 | +# Annotation: Hands-On Exercise |
| 2 | + |
| 3 | +Now it's your turn to try! |
| 4 | + |
| 5 | + |
| 6 | +## The Code |
| 7 | + |
| 8 | +Here's some code that hasn't been annotated particularly well. |
| 9 | + |
| 10 | +```{r} |
| 11 | +library(qqman) |
| 12 | +library(beepr) |
| 13 | +
|
| 14 | +
|
| 15 | +# The list of datasets |
| 16 | +
|
| 17 | +datasets=c("fivehmc.glmmtmb.all.cing","fivehmc.glmmtmb.all.pari","fivemc.glmmtmb.cingulate","fivemc.glmmtmb.parietal") |
| 18 | +
|
| 19 | +mod=c("5hmc","5hmc","5mc","5mc") # For labeling purposes |
| 20 | +tissues=rep(c("Cingulate","Parietal"),2) # For labeling purposes |
| 21 | +
|
| 22 | +if(dir.exists(paste0(home,"/ManhattanPlots"))==FALSE){ # For storing the graphs |
| 23 | + dir.create(paste0(home,"/ManhattanPlots")) |
| 24 | +} |
| 25 | +tissues.f=c("Cingulate","Cingulate","Parietal","Parietal") |
| 26 | +stage=c("limbic","neocortical","limbic","neocortical") |
| 27 | +
|
| 28 | +fdr.p=data.frame(t(c(rep(0,4)))) |
| 29 | +colnames(fdr.p)=c("FC","HolP","tissues","stage") |
| 30 | +fdr.p=fdr.p[-1,] |
| 31 | +
|
| 32 | +for(ii in 1:length(fdr.files)){ |
| 33 | +xx=read.table(paste0(home,"/",fdr.files[ii]),row.names=1,skip=1) |
| 34 | +xx=cbind(xx,rep(tissues.f[ii],nrow(xx)),rep(stage[ii],nrow(xx))) |
| 35 | +colnames(xx)=c("FC","HolP","tissues","stage") |
| 36 | +fdr.p=rbind(fdr.p,xx) |
| 37 | +} |
| 38 | +
|
| 39 | +
|
| 40 | +for(ii in 1:length(datasets)){ |
| 41 | +data=eval(parse(text=datasets[ii])) |
| 42 | +
|
| 43 | +probes=rownames(data) |
| 44 | +
|
| 45 | +# Match the probe names |
| 46 | +if(ii<3){ |
| 47 | +yy=fdr.p[which(fdr.p[,3]==tissues[ii]),] |
| 48 | +yy=yy[match(probes,rownames(yy)),] |
| 49 | +yy=yy[which(!is.na(yy[,1])),] |
| 50 | +
|
| 51 | +yy.l=yy[which(yy[,4]=="limbic"),] |
| 52 | +yy.n=yy[which(yy[,4]=="neocortical"),] |
| 53 | +
|
| 54 | + probes.l=rownames(yy.l) |
| 55 | +}else{ |
| 56 | + probes.l=rownames(data) |
| 57 | +} |
| 58 | +
|
| 59 | +xx.l=match(probes.l,EPIC.manifest@ranges@NAMES) |
| 60 | +
|
| 61 | +chrs.l=EPIC.manifest@elementMetadata@listData$chrmA[xx.l] |
| 62 | +#manhattan function requires chromosomes to be noted as numeric vector with X, Y, and MT chrs being 23:25 respectively. |
| 63 | +chrs.l=gsub("chr","",chrs.l) |
| 64 | +chrs.l=gsub("X",23,chrs.l) |
| 65 | +chrs.l=gsub("Y",24,chrs.l) |
| 66 | +chrs.l=gsub("MT",25,chrs.l) |
| 67 | +
|
| 68 | +if(ii<3){ |
| 69 | +probes.n=rownames(yy.n) |
| 70 | +xx.n=match(probes.n,EPIC.manifest@ranges@NAMES) |
| 71 | +
|
| 72 | +chrs.n=EPIC.manifest@elementMetadata@listData$chrmA[xx.n] |
| 73 | +#manhattan function requires chromosomes to be noted as numeric vector with X, Y, and MT chrs being 23:25 respectively. |
| 74 | +chrs.n=gsub("chr","",chrs.n) |
| 75 | +chrs.n=gsub("X",23,chrs.n) |
| 76 | +chrs.n=gsub("Y",24,chrs.n) |
| 77 | +chrs.n=gsub("MT",25,chrs.n) |
| 78 | +
|
| 79 | +# Make a dataframe |
| 80 | +manh.data.l=data.frame(probes.l, |
| 81 | + as.numeric(chrs.l), |
| 82 | + EPIC.manifest@ranges@start[xx.l], |
| 83 | + -log10(yy.l[,2]), |
| 84 | + stringsAsFactors = FALSE) |
| 85 | +
|
| 86 | +manh.data.n=data.frame(probes.n, |
| 87 | + as.numeric(chrs.n), |
| 88 | + EPIC.manifest@ranges@start[xx.n], |
| 89 | + -log10(yy.n[,2]), |
| 90 | + stringsAsFactors = FALSE) |
| 91 | +colnames(manh.data.l)=colnames(manh.data.n)=c("CPG","CHR","BP","P") |
| 92 | +
|
| 93 | +}else{ |
| 94 | + manh.data.l=data.frame(probes.l, |
| 95 | + as.numeric(chrs.l), |
| 96 | + EPIC.manifest@ranges@start[xx.l], |
| 97 | + -log10(data$LimbicVSNoneFDR), |
| 98 | + -log10(data$NeocorticalVSNoneFDR), |
| 99 | + stringsAsFactors = FALSE) |
| 100 | + colnames(manh.data.l)=c("CPG","CHR","BP","P","NP") |
| 101 | + manh.data.l=manh.data.l[-which(manh.data.l$NP==Inf),] |
| 102 | + |
| 103 | + xx=unique(which(is.nan(manh.data.l$NP)),which(is.nan(manh.data.l$P))) |
| 104 | + if(length(xx)>0){ |
| 105 | + manh.data.l=manh.data.l[-xx,] |
| 106 | + } |
| 107 | +} |
| 108 | +
|
| 109 | +# Label them as such. |
| 110 | +
|
| 111 | +manh.data.l=manh.data.l[-which(manh.data.l$P==Inf),] |
| 112 | +manh.data.l$CPG=as.character(manh.data.l$CPG) # CPG's need to be a character vector |
| 113 | +manh.data.l$CHR=as.numeric(manh.data.l$CHR) # Chromsomal locations need to be numeric |
| 114 | +manh.data.l$BP= as.numeric(manh.data.l$BP) |
| 115 | +manh.data.l=manh.data.l[which(!is.na(manh.data.l$CHR)),] |
| 116 | +
|
| 117 | +if(ii<3){ |
| 118 | +manh.data.n=manh.data.n[-which(manh.data.n$P==Inf),] |
| 119 | +manh.data.n$CPG=as.character(manh.data.n$CPG) # CPG's need to be a character vector |
| 120 | +manh.data.n$CHR=as.numeric(manh.data.n$CHR) # Chromsomal locations need to be numeric |
| 121 | +manh.data.n$BP= as.numeric(manh.data.n$BP) |
| 122 | +manh.data.n=manh.data.n[which(!is.na(manh.data.n$CHR)),] |
| 123 | +
|
| 124 | +} |
| 125 | +
|
| 126 | +# Manhattan Plot for Limbic data |
| 127 | +jpeg(paste0(home,"/ManhattanPlots/",datasets[ii],"MidManhattan2.jpeg")) |
| 128 | +if(ii<3){ |
| 129 | +sig=manh.data.l$CPG[which(manh.data.l$P>3)] |
| 130 | +manhattan(manh.data.l,chr="CHR",bp="BP",p="P",snp="CPG",logp=F,ylim=c(0,round(max(manh.data.l$P))+10),highlight=sig,chrlabs=c(1:22, "X", "Y"),suggestiveline=FALSE,genomewideline=TRUE,main=paste( mod[ii],tissues[ii],"Mid Stage Disease")) |
| 131 | +}else{ |
| 132 | +sig=manh.data.l$CPG[which(manh.data.l$P>3)] |
| 133 | +manhattan(manh.data.l,chr="CHR",bp="BP",p="P",snp="CPG",ylim=c(0,round(max(manh.data.l$P[!is.na(manh.data.l$P)]))),logp=F,highlight=sig,chrlabs=c(1:22, "X", "Y"),suggestiveline=FALSE,genomewideline=TRUE,main=paste( mod[ii],tissues[ii],"Mid Stage Disease")) |
| 134 | +} |
| 135 | +# highlight probes |
| 136 | +abline(h=-log10(.001),col="red") |
| 137 | +dev.off() |
| 138 | +
|
| 139 | +# Manhattan Plot for Neocortical data |
| 140 | +jpeg(paste0(home,"/ManhattanPlots/",datasets[ii],"LateManhattan2.jpeg")) |
| 141 | +if(ii<3){ |
| 142 | + sig=manh.data.n$CPG[which(manh.data.n$P>3)] |
| 143 | + manhattan(manh.data.n,chr="CHR",bp="BP",p="P",snp="CPG",ylim=c(0,round(max(manh.data.l$P))+10),logp=F,highlight=sig,chrlabs=c(1:22, "X", "Y"),suggestiveline=FALSE,genomewideline=TRUE,main=paste(mod[ii],tissues[ii],"Late Stage Disease")) |
| 144 | +}else{ |
| 145 | + sig=manh.data.l$CPG[which(manh.data.l$NP>3)] |
| 146 | + manhattan(manh.data.l,chr="CHR",bp="BP",p="NP",snp="CPG",ylim=c(0,round(max(manh.data.l$NP))),logp=F,highlight=sig,chrlabs=c(1:22, "X", "Y"),suggestiveline=FALSE,genomewideline=TRUE,main=paste(mod[ii],tissues[ii],"Late Stage Disease")) |
| 147 | +} |
| 148 | + abline(h=-log10(.001),col="red") |
| 149 | + dev.off() |
| 150 | +
|
| 151 | +
|
| 152 | +} |
| 153 | +# Just a nifty way to signal that your graphs are finished being made. |
| 154 | +beep(sound=2) |
| 155 | +``` |
| 156 | + |
| 157 | +## Questions |
| 158 | + |
| 159 | +1. Create a README file for this code. Make sure that it includes general purpose of the project, instructions on how to re-run the project, any software required by the project, both input and output file descriptions, and descriptions of any additional tools included in the project. |
| 160 | + |
| 161 | +2. How can the annotation for this section be improved? |
| 162 | + |
| 163 | +``` |
| 164 | +# Make a dataframe |
| 165 | +manh.data.l=data.frame(probes.l, |
| 166 | + as.numeric(chrs.l), |
| 167 | + EPIC.manifest@ranges@start[xx.l], |
| 168 | + -log10(yy.l[,2]), |
| 169 | + stringsAsFactors = FALSE) |
| 170 | +
|
| 171 | +manh.data.n=data.frame(probes.n, |
| 172 | + as.numeric(chrs.n), |
| 173 | + EPIC.manifest@ranges@start[xx.n], |
| 174 | + -log10(yy.n[,2]), |
| 175 | + stringsAsFactors = FALSE) |
| 176 | +colnames(manh.data.l)=colnames(manh.data.n)=c("CPG","CHR","BP","P") |
| 177 | +``` |
| 178 | + |
| 179 | +```{r} |
| 180 | +devtools::session_info() |
| 181 | +``` |
0 commit comments