Main / WikiSandbox
Go-Statistics
2005 | 2008 | ||||||
category | BP | MF | CC | BP | MF | CC | |
---|---|---|---|---|---|---|---|
go-ids | 8924 | 6929 | 1397 | 14659 | 8260 | 2064 | |
obsoletes | 330 | 526 | 113 | 471 | 566 | 117 | |
MetaCyc | 517 | 3460 | 0 | 680 | 3524 | 1 | |
EC | 1 | 4306 | 0 | 1 | 4762 | 0 | |
slim-generic | 51 | 41 | 36 | 53 | 42 | 36 | |
slim-plant | 52 | 28 | 29 | 51 | 27 | 27 | |
slim-yeast | 35 | 23 | 25 | 35 | 23 | 25 |
exprdata=read.table("http://goblet.molgen.mpg.de/data/R/ALLs-data.tab",header=TRUE) head(exprdata) summary(exprdata) cnames=colnames(exprdata) all1=grep("ALL1",cnames) e2a=grep("E2A",cnames) exprdata=exprdata[,c(all1,e2a)] head(exprdata) cnames=colnames(exprdata) all1=grep("ALL1",cnames) e2a=grep("E2A",cnames) cols=rep(4,length(cnames)) cols[e2a]=2 cols pdf("exprdata.pdf") boxplot(exprdata,col=cols) par(mfrow=c(2,2),mai=c(0.5,0.5,0.5,0.5)) for (mdist in c("manhattan","euclidian")) { for (mclust in c("single","complete")) { plot(hclust(dist(t(exprdata),method=mdist),method=mclust),main=paste(mclust,mdist)) } } par(mfrow=c(1,1)) mtest = function (row) { return(t.test(row[all1],row[e2a])$p.value) } mtest = function (row) { return(t.test(row[all1],row[e2a])$p.value) } lfc=function(row) { return(log2(median(2^row[all1])/median(2^row[e2a]))) } idx=which(apply(exprdata,1,IQR)>0.9) pvals=apply(exprdata[idx,],1,mtest) head(pvals) adj=p.adjust(pvals,method="BH") lfc=apply(exprdata[idx,],1,lfc) res.df=data.frame(pval=pvals,adjp=adj,lfc=lfc,abs=abs(lfc)) head(res.df) dim(with(res.df,res.df[adjp<0.05& abs>1,])) diff.genes=with(res.df,res.df[adjp<0.05& abs>1,]) top20=head(diff.genes[order(1/diff.genes$abs),],n=20) top20 heatmap(as.matrix(exprdata[rownames(top20),])) plot(pvals~lfc,pch="+") hist(pvals,main="plot rawp") write.table(top20,file="top20.tab",quote=F,sep="\t") # PCA opar=par(mfrow=c(3,3),omi=c(0.2,0.2,0.5,0.2),mai=c(0.1,0.1,0.1,0.1),las=1) pca.res=prcomp(t(exprdata)) plotPCA= function (mtitle) { pca.vars=summary(pca.res)[6]$importance[2,] pca.xx=pca.res$x for (i in 1:3) { for (j in 1:3) { if (i == j) { plot(-1:1,-1:1,col="white",pch=4,xlab="",ylab="",yaxt="n",xaxt="n",main="") text(0,0,paste("PC",j, "\n",format(pca.vars[i],digits=3))) } else { plot(pca.xx[,i],pca.xx[,j],col=cols,pch=cols) } } } mtext(mtitle, 3,outer=TRUE) } plotPCA("all genes") pca.res=prcomp(t(exprdata[rownames(res.df),])) plotPCA("diff genes") par(opar) dev.off()