### Custom CGH array handling functions ### Author : ### License : GPL3+ ### Converts a MA-list LIMMA object to a simpler data.frame limma2df <- function(object) { cgh_df <- data.frame( name = object$genes$ProbeName, M = object$M[,1], A = object$A[,1], position = object$genes$SystematicName, row = object$genes$Row, col = object$genes$Col, stringsAsFactors = FALSE ) return(cgh_df) } ### Extracts genomic coordinates from systematic names coordinates <- function(systematicNames) { # Initialization chrom <- start <- end <- rep(NA, length(systematicNames)) # Limit to mapped probes regex <- "^chr([0-9XY]+):([0-9]+)-([0-9]+)$" map <- grepl(regex, systematicNames) # Extraction chrom[map] <- sub(regex, "\\1", systematicNames[map]) start[map] <- as.integer(sub(regex, "\\2", systematicNames[map])) end[map] <- as.integer(sub(regex, "\\3", systematicNames[map])) return(data.frame(chrom=chrom, start=start, end=end, stringsAsFactors=FALSE)) } ### Replaces replicated probe M by a single distinct value, turn NA the others replicates <- function(M, probeName, fun=median, na.rm=TRUE, ...) { Mr <- tapply(X=M, INDEX=probeName, FUN=fun, na.rm=na.rm, ...) out <- rep(as.double(NA), length(M)) out[ match(names(Mr), probeName) ] <- Mr return(out) } ### Derivative Log-Ratio Spread DLRS <- function(M, chrom, position) { M <- M[ order(chrom, position) ] return(IQR(diff(M), na.rm=TRUE) / (1.349 * sqrt(2))) } ### Smooths a 2D matrix using moving window averages winSmooth <- function(mtx, w=5L) { # Add empty margins h <- matrix(as.double(NA), nrow=w, ncol=ncol(mtx)) v <- matrix(as.double(NA), ncol=w, nrow=nrow(mtx)+2*w) dat <- cbind(v, rbind(h, mtx, h), v) # Mask NAs nna <- !is.na(dat) dat[ !nna ] <- 0 # Sum in moving windows s <- matrix(0, nrow=nrow(mtx), ncol=ncol(mtx)) n <- matrix(0L, nrow=nrow(mtx), ncol=ncol(mtx)) for(i in 0:(2*w)) { message(i, "/", 2*w) for(j in 0:(2*w)) { s <- s + dat[ 1:nrow(mtx)+i , 1:ncol(mtx)+j ] n <- n + nna[ 1:nrow(mtx)+i , 1:ncol(mtx)+j ] } } # Mask NaN (0/0) and +Inf (x/0) out <- s / n out[ is.nan(out) ] <- NA out[ is.infinite(out) ] <- NA return(out) } ### Plots log-ratio according to their spatial coordinates on the array spatialPlot <- function(M, Row, Col, maxSd=2, nlevels=10L, smooth=0L, w=5L, ...) { # Log-ratio in a spatial matrix mtx <- matrix(as.double(NA), ncol=max(Col), nrow=max(Row)) mtx[ cbind(Row, Col) ] <- M # Smoothing if(smooth > 0) for(i in 1:smooth) mtx <- winSmooth(mtx, w=w) # Color palette pal <- grey(seq(from=1, to=0, length.out=nlevels)) # Range zlim <- mean(mtx, na.rm=TRUE) + c(-1*maxSd, maxSd)*sd(mtx, na.rm=TRUE) mtx[ mtx < zlim[1] ] <- zlim[1] mtx[ mtx > zlim[2] ] <- zlim[2] # Image image(x=0:max(Col), y=0:max(Row), z=t(mtx), xlab="Col", ylab="Row", col=pal, zlim=zlim, main=sprintf("M range: [ %.3f ; %.3f ]", zlim[1], zlim[2]), ...) } ### Computes and plots a moving average along a chromosome movingAverage <- function(M, position, w=100, ...) { # Sorted by position M <- M[ order(position) ] position <- position[ order(position) ] # Computation mavg <- rep(as.double(NA), length(M)) for(i in (1+w):(length(M)-w)) mavg[i] <- mean(M[(i-w):(i+w)], na.rm=TRUE) # Plot plot(x=position, y=mavg, type="l", ...) } ### Converts DNAcopy output into Rgb track track.segments <- function(seg, sampleName, organism="Human", assembly="GRCh37") { # Table object <- track.table( chrom = factor(seg$chrom, levels=c(1:22, "X", "Y")), start = seg$loc.start, end = seg$loc.end, strand = "+", probes = seg$num.mark, logRatio = seg$seg.mean, state = seg$state, .makeNames = TRUE, .name = sampleName, .organism = organism, .assembly = assembly, warn = FALSE ) # Drawing parameters object$setParam("drawFun", "draw.hist") object$setParam("column", "logRatio") object$setParam("yaxt", "s") object$setParam("ylim", c(-2, 2)) object$setParam("colorVal", NA) object$setParam( "colorFun", function() { output <- rep("#888888", nrow(slice)) output[slice$state == "amplified"] <- "#8888FF" output[slice$state == "deleted"] <- "#FF8888" return(output) } ) return(object) } ### Writes a .bed file from DNAcopy custom results ### http://genome.ucsc.edu/FAQ/FAQformat.html#format1 write.BED <- function(seg, sampleName, fileName=sprintf("%s.bed", sampleName), trackName=sampleName) { # Table rgbList <- c("deleted"="255,100,100", "normal"="150,150,150", "amplified"="100,100,255") bed <- data.frame( chrom = sprintf("chr%s", seg$chrom), chromStart = seg$loc.start, chromEnd = seg$loc.end, name = paste(sampleName, make.unique(seg$chrom), sep="#"), score = seg$num.mark, strand = "+", thickStart = seg$loc.start, thickStart = seg$loc.end, itemRgb = sub("", "", rgbList[ seg$state ]) ) # Header cat(sprintf("track name=\"%s\" description=\"CGH-array segmented by DNAcopy\" itemRgb=\"On\"\n", trackName), file=fileName) write.table(bed, file=fileName, sep=" ", dec=".", row.names=FALSE, col.names=FALSE, append=TRUE, quote=FALSE) }