AffyArray HowTo

From TaejoonLab
Jump to: navigation, search

Installation

> source("https://bioconductor.org/biocLite.R")
> biocLite()
> biocLite("affy")
> biocLite("affyPLM")
> biocLite("limma")

Preprocessing

> data_name <- 'my_array'
> filename_tbl <- paste(data_name, 'txt', sep='.')

> exp_tbl <- read.table(file=filename_tbl, header=T, stringsAsFactors=FALSE, sep="\t")
> filenames_vector <- as.vector(exp_tbl$Filename, mode='character')
> samples_vector <- as.vector(exp_tbl$Sample, mode='character')

> library(affy)
> affy_raw <- ReadAffy(filenames=filenames_vector, sampleNames=samples_vector)
> affy_corrected <- bg.correct(affy_raw, method='rma')
> affy_normalized <- normalize(affy_corrected, method='quantiles')
> eset_rma <- rma(affy_normalized)
> write.exprs(eset_rma, file=paste(data_name, 'eset_rma', 'txt', sep='.'))

Boxplot

After running 'Preprocessing',

> png(filename=paste(data_name,"box_raw", "png", sep='.'), width=1024, height=640)
> par(mar=c(10,4,4,2))
> boxplot( as.data.frame(log2(intensity(affy_raw))), las=2, mex=0.1,
          main=paste(data_name,'(RAW)',sep=''), ylab="log2 intensity")
> dev.off()

> png(filename=paste(data_name,".box_rma", "png", sep='.'), width=1024, height=640)
> par(mar=c(10,4,4,2))
> boxplot( as.data.frame(exprs(eset_rma)), las=2, mex=0.1, 
          main=paste(data_name,"(normalized - RMA)"), ylab="log2 normalized intensity" )
> dev.off()

Hierarchical clustering

> eset_t_mat <- t(as.matrix(as.data.frame(eset_rma)))
> corr_spearman <- dist(1 - cor(eset_t_mat, method='spearman'))
> png(filename=paste(data_name,"SpR_clust", "png",sep='.'), width=1024,height=640)
> plot(hclust( corr_spearman, method="average"), main=paste(data_name,":hclust_SpR_average",sep=''))
> dev.off()

> corr_pearson <- dist(1 - cor(eset_t_mat, method='pearson'))
> png(filename=paste(data_name,"CC_clust", "png",sep='.'), width=1024,height=640)
> plot(hclust( corr_pearson, method="average"), main=paste(data_name,":hclust_CC_average",sep=''))
> dev.off()

> png(filename=paste(data_name,"EucDist_clust", "png",sep='.'), width=1024,height=640)
> plot(hclust( dist(t(eset_t_mat), method='euclidean'), method="average"), main=paste(data_name,":hclust_EucDist_average",sep=''))
> dev.off()

PLM & RLE plot

library(affyPLM)

affy_raw_PLM <- fitPLM(affy_raw)
sample_names <- sampleNames(affy_raw)

for(i in 1:length(sample_names)) {
  filename_img <- paste(sample_names[i],".PLM_resids.png", sep="")
  png(filename=filename_img, width=1024, height=1024);
  image( affy_raw_PLM, which=i, type="resids", add.legend=TRUE )
  dev.off()
}

png(filename=paste(data_name,'.RLE.png',sep=''), width=1024,height=800)
par(mar=c(10,4,4,2))
RLE(affy_raw_PLM, fin=c(1024, 600), las=2, mex=0.1,
    main=paste("Relative Log Expression(RLE) for ",data_name, sep=''))
dev.off()

DE with limma

library(limma)

group <- factor(exp_tbl$Group, levels=c('GroupA','GroupB'))
design_group <- model.matrix(~0+group)
colnames(design_group) <- c(levels(group))

fit_group <- lmFit(eset_rma, design_group)
fit_group <- eBayes(fit_group)

contrast_BA <- makeContrasts(GroupB-GroupA,levels=design_group)
contrast_fit_BA <- contrasts.fit(fit_group, contrast_BA)
contrast_fit_BA <- eBayes(contrast_fit_BA)

diff_BA <- topTable(contrast_fit_BA, n=nrow(contrast_fit_BA),coef=1, adjust="fdr", resort.by="logFC")
write.table(diff_BA , paste(data_name,'top.txt',sep='.'), sep='\t')