# using_R_in_data_analysis.R # Author: Xiuxia Du # Started in April 2016. # Modified in July 2016. # ---------------------------------------------------------------------------------- # Initialization # ---------------------------------------------------------------------------------- rm(list=ls()) # clear the working space graphics.off() # close all figures # folder to store r packages path_to_R_libs <- "/Users/xdu4/Documents/Duxiuxia/R_libs" # folder to store raw test datasets path_to_raw_data <- "/Users/xdu4/Documents/Duxiuxia/UAB/XCMS/faahKO" # set up the working directory setwd("/Users/xdu4/Documents/Duxiuxia/UAB/XCMS/AnalysisResults") # ---------------------------------------------------------------------------------- # Get the test raw data and intall the XCMS package # ---------------------------------------------------------------------------------- source("https://bioconductor.org/biocLite.R") biocLite("faahKO") # The data will be stored in the "library/faahKO/cdf/" folder in your R home directory. # You can find where your R home directory is by running "R.home()" in your R console in RStudio. R.home() # Move the raw faahKO data to your "path_to_raw_data" that was specified in the Initialization stage. biocLite("xcms") # install the xcms package library(xcms) # load the xcms package # ---------------------------------------------------------------------------------- # Data pre-processing by XCMS # ---------------------------------------------------------------------------------- # 1. Detect peaks/features in EICs # 1.1 obtain the list of all of the raw files all_raw_files <- list.files(path_to_raw_data, recursive=T, full.names=T) # 1.2 Peak picking using the centWave method xset <- xcmsSet(all_raw_files, method="centWave", ppm=500, peakwidth=c(20, 120), snthresh=10, prefilter=c(3,400), mzCenterFun="wMean", integrate=1, mzdiff=-0.001, fitgauss=F, noise=100, sleep=0) # What do these arguments mean? Let's find out: ?findPeaks.centWave # 1.3 Examine the xset object str(xset, max.level = 2) head(xset@peaks) # To get information about each field in the xset object, do ?xcmsSet # and then click on "xcmsSet-class" at the end of the Help page # plot EICs II <- which(xset@peaks[,"mz"]>=526.1 & xset@peaks[,"mz"]<=526.3 & xset@peaks[,"sample"]==1) my_rtrange <- matrix(c(rep(3000, length(II)), rep(3360, length(II))), ncol=2, byrow=F) EICs <- getEIC(xset, mzrange=xset@peaks[II, c("mzmin", "mzmax")], rtrange=my_rtrange) plot(EICs) # 1.4 Information on the peaks are stored in the matrix xset@peaks. # Export peaks matrix for easy examination in Excel. out_file_name <- "peaks_after_peakpicking.csv" write.csv(x=xset@peaks, file=out_file_name, row.names=F) # open the .csv file in your working directory to take a look # 2. Group peaks from different samples together # 2.1 Group peaks grouping_method <- "density" xset1 <- group(object=xset, method=grouping_method, bw=30, minfrac=0.5, minsamp=1, mzwid=0.25, max=50, sleep=0) # What do these arguments mean? Let's find out: ??xcms::group.density # to get the list of grouping methods getOption("BioC")$xcms$group.methods # 2.2 Examine xset1 str(xset1, max.level = 2) # xset@groups and xset@groupidx have been filled by the group() function. colnames(xset1@groups) # summary of each peak group xset1@groupidx[[1]] # groupidx is a list. # Each list contains the indices of peaks in each peak group. # Here index is the row index in the xset@peaks matrix. # The length of the groupidx is, of course, the total number of peak groups. # 2.3 Export the summary information for all of the peak groups out_file_name <- "groups_1.csv" write.csv(x=xset1@groups, file=out_file_name, row.names=F) # 3. Alignment # 3.1 Align xset2 <- retcor(xset1, method="obiwarp", plottype="deviation", profStep=1, distFun="cor_opt", gapInit=NULL, gapExtend=NULL, factorDiag=2, factorGap=1, localAlignment=0, initPenalty=0) # What do these arguments mean? Let's find out: ??xcms::retcor.obiwarp # 3.2 Examine xset2 str(xset2, max.level = 2) # retention time before alignment xset@rt$raw[[1]][1:5] xset@rt$corrected[[1]][1:5] # same retention time # retention time after alignment xset2@rt$raw[[1]][1:5] xset2@rt$corrected[[1]][1:5] # retention time has been corrected # amount of retention time shift for all of the peaks rt_shift_all_peaks <- xset2@peaks[,"rt"] - xset@peaks[,"rt"] plot(sort(rt_shift_all_peaks), pch=16, cex=0.5, xlab="peak index", ylab="rt deviation") # amount of retention time shift for all of the scans file_id <- 1 rt_shift_all_scan <- xset2@rt$raw[[file_id]] - xset2@rt$corrected[[file_id]] plot(rt_shift_all_scan, pch=16, cex=0.5, xlab="scan number", ylab="rt deviation") # 3.3 Export the peaks out_file_name <- "peaks_after_alignment.csv" write.csv(x=xset2@peaks, file=out_file_name, row.names=F) # 4. Second grouping xset3 <- group(object=xset2, method=grouping_method, bw=30, minfrac=0.5, minsamp=1, mzwid=0.25, max=50, sleep=0) # 5. fill peaks xset4 <- fillPeaks(xset3, method="chrom") out_file_name <- "final_peaks.csv" write.csv(x=xset4@peaks, file=out_file_name, row.names=F) # 6. feature annotation xset_annotated <- annotate(xset4) peaklist <- getPeaklist(xset_annotated) write.csv(peaklist, file="final_peaks_annotated.csv") # take a look at the annotated peaks # rule tables file1 <- system.file('rules/primary_adducts_pos.csv', package = "CAMERA") file2 <- system.file('rules/primary_adducts_neg.csv', package = "CAMERA") file3 <- system.file('rules/extended_adducts_pos.csv', package = "CAMERA") file4 <- system.file('rules/extended_adducts_neg.csv', package = "CAMERA") # ---------------------------------------------------------------------------------- # More details on feature annotation # ---------------------------------------------------------------------------------- # 1. feature annotation of a single file biocLite("CAMERA") # install the xcms package library(CAMERA) file_name <- "/Users/xdu4/Documents/Duxiuxia/UAB/XCMS/faahKO/KO/ko15.cdf" xs <- xcmsSet(file_name, method="centWave", ppm=500, peakwidth=c(20, 120), snthresh=10, prefilter=c(3,400), mzCenterFun="wMean", integrate=1, mzdiff=-0.001, fitgauss=F, noise=100, sleep=0) # Create an xsAnnotate object an <- xsAnnotate(xs) # Group based on RT anF <- groupFWHM(an, perfwhm=0.6) # Annotate isotopes anI <- findIsotopes(anF, mzabs=0.01) # Verify grouping anIC <- groupCorr(anI, cor_eic_th = 0.75) # Annotate adducts anFA <- findAdducts(anIC, polarity="positive") plotEICs(anFA, pspec=2, maxlabel = 5) plotPsSpectrum(anFA, pspec=2, maxlabel=5) peaklist <- getPeaklist(anFA) write.csv(peaklist, file="xs_annotated.csv") # or combine the above steps into one step anFA_one_step <- annotate(xs) # ---------------------------------------------------------------------------------- # More on XCMS # ---------------------------------------------------------------------------------- library(help=xcms) getOption("BioC")