### R code from vignette source 'src/sec10_smallCellNumber.Rnw'

###################################################
### code chunk number 1: smallCellNumber1
###################################################
library("HD2013SGI")

dir.create(file.path("result","Figures"),recursive=TRUE,showWarnings=FALSE)

data("featuresPerWell", package="HD2013SGI")
data("TargetAnnotation",package="HD2013SGI")
data("QueryAnnotation",package="HD2013SGI")
data("datamatrix", package="HD2013SGI")


###################################################
### code chunk number 2: smallCellNumber2
###################################################
NROW = 15
NCOL = 23
NFIELD = 4


###################################################
### code chunk number 3: smallCellNumber3
###################################################
plates = featuresPerWell$Anno[seq(1,nrow(featuresPerWell$Anno),
                                  by=NFIELD*NCOL*NROW),"plate"]
PlateAnnotation = HD2013SGI:::parsePlateBarcodes(plates)


###################################################
### code chunk number 4: smallCellNumber4
###################################################
S = which(PlateAnnotation$queryGroup=="sample")
tdnames = unique(PlateAnnotation$targetDesign[S])
qnames = unique(PlateAnnotation$queryGene[S])
qdnames = unique(PlateAnnotation$queryDesign[S])
repnames = unique(PlateAnnotation$replicate[S])


###################################################
### code chunk number 5: smallCellNumber5
###################################################
D = array(0.0, dim=c(field=NFIELD,col=NCOL,row=NROW,
                     features=dim(featuresPerWell$data)[2],
                     targetDesign=length(tdnames),
                     query=length(qnames),queryDesign=length(qdnames),
                     replicate=length(repnames)))
dimnames(D) = list(field=seq_len(NFIELD),
                   col=seq_len(NCOL),row=LETTERS[seq_len(NROW)+1],
                   features=dimnames(featuresPerWell$data)[[2]],
                   targetDesign=tdnames,
                   queryGene=qnames,queryDesign=qdnames,replicate=repnames)
z=0
for (td in tdnames) {
  for (q in qnames) {
    for (qd in qdnames) {
      for (r in repnames) {
        plate = PlateAnnotation$plate[
                      which((PlateAnnotation$targetDesign == td) &
          (PlateAnnotation$queryGene == q) &
          (PlateAnnotation$queryDesign == qd) &
          (PlateAnnotation$replicate == r) ) ]
        z=z+1
        I = which(featuresPerWell$Anno$plate == plate)
        D[,,,,td,q,qd,r] = as.vector(featuresPerWell$data[I,])
      }
    }
  }
}
D[is.na(D)] = 0.0


###################################################
### code chunk number 6: smallCellNumber6
###################################################
Dsub1 = D[1,,,,,,,]
Dsub2 = (D[1,,,,,,,] + D[2,,,,,,,])/2
D = (D[1,,,,,,,] + D[2,,,,,,,] + D[3,,,,,,,] + D[4,,,,,,,])/4

D = aperm(D,c(1,2,4,5,6,3,7))
dn = dimnames(D)
dim(D) = c(prod(dim(D)[1:2]),dim(D)[3:7])
dimnames(D) = c(list(targetGene =
                       sprintf("%s%d",rep(LETTERS[seq_len(NROW)+1],each=NCOL),
                                      rep(seq_len(NCOL),times=NROW))),
                   dn[3:7])

Dsub1 = aperm(Dsub1,c(1,2,4,5,6,3,7))
dim(Dsub1) = c(prod(dim(Dsub1)[1:2]),dim(Dsub1)[3:7])
dimnames(Dsub1) = dimnames(D)

Dsub2 = aperm(Dsub2,c(1,2,4,5,6,3,7))
dim(Dsub2) = c(prod(dim(Dsub2)[1:2]),dim(Dsub2)[3:7])
dimnames(Dsub2) = dimnames(D)

datamatrixfullsub = list(Dsub1 = Dsub1, Dsub2 = Dsub2, Dsub4 = D)


###################################################
### code chunk number 7: smallCellNumber7
###################################################
IndexSAMPLE = which(TargetAnnotation$group == "sample")
IndexCTRL = which(TargetAnnotation$group == "SingleKDctrl")
IndexNEG = which(TargetAnnotation$group == "negctrl")
IndexSAMPLENEG = c(IndexSAMPLE, IndexNEG)


###################################################
### code chunk number 8: smallCellNumber4
###################################################
colCTRL = rep("gray", nrow(TargetAnnotation))
colCTRL[TargetAnnotation$group == "SingleKDctrl"] = "royalblue"
colCTRL[TargetAnnotation$group == "negctrl"] = "red"


###################################################
### code chunk number 9: SCNTransformationAndNormalization1
###################################################
logtrafo <- function(x,c) {
  log2((x+sqrt(x^2+c^2))/2)
}


###################################################
### code chunk number 10: SCNTransformationAndNormalization2
###################################################
for (i in seq_len(dim(D)[5])) {
  m = quantile(D[,,,,i,],probs=0.03,na.rm=TRUE)
  D[,,,,i,] = logtrafo(D[,,,,i,],m)
  Dsub1[,,,,i,] = logtrafo(Dsub1[,,,,i,],m)
  Dsub2[,,,,i,] = logtrafo(Dsub2[,,,,i,],m)
}


###################################################
### code chunk number 11: SCNTransformationAndNormalization3
###################################################
normalize <- function(D) {
  M = apply(D,c(2:6),median,na.rm=TRUE)
  M2 = apply(M, c(2,4), mean)
  M2 = rep(M2, times=8)
  dim(M2) = dim(M)[c(2,4,1,3,5)]
  M2 = aperm(M2,c(3,1,4,2,5))
  M = M - M2
  M = rep(M[], each=dim(D)[1])
  dim(M) = dim(D)
  D = D - M
  D
}

D <- normalize(D)
Dsub1 <- normalize(Dsub1)
Dsub2 <- normalize(Dsub2)


###################################################
### code chunk number 12: SCNTransformationAndNormalization4
###################################################
for (i in 1:dim(D)[5]) {
  D[,,,,i,] = (D[,,,,i,] - median(D[,,,,i,],na.rm=TRUE)) /
    mad(D[,,,,i,],na.rm=TRUE)
}
for (i in 1:dim(Dsub1)[5]) {
  Dsub1[,,,,i,] = (Dsub1[,,,,i,] - median(Dsub1[,,,,i,],na.rm=TRUE)) /
    mad(Dsub1[,,,,i,],na.rm=TRUE)
}
for (i in 1:dim(Dsub2)[5]) {
  Dsub2[,,,,i,] = (Dsub2[,,,,i,] - median(Dsub2[,,,,i,],na.rm=TRUE)) /
    mad(Dsub2[,,,,i,],na.rm=TRUE)
}


###################################################
### code chunk number 13: SCNqualityControlFeatures1
###################################################
C = rep(NA_real_,dim(D)[5])
D2 = (D[,1,,1,,] + D[,2,,1,,] + D[,1,,2,,] + D[,2,,2,,]) / 4
D2sub1 = (Dsub1[,1,,1,,] + Dsub1[,2,,1,,] + Dsub1[,1,,2,,] + Dsub1[,2,,2,,]) / 4
D2sub2 = (Dsub2[,1,,1,,] + Dsub2[,2,,1,,] + Dsub2[,1,,2,,] + Dsub2[,2,,2,,]) / 4
for (i in 1:dim(D)[5]) {
  C[i] = cor(as.vector(D2[IndexSAMPLE,,i,1]),as.vector(D2[IndexSAMPLE,,i,2]))
}


###################################################
### code chunk number 14: SCNqualityControlFeatures5
###################################################
I = which(C >= 0.6)
D = D[,,,,I,,drop=FALSE]
Dsub1 = Dsub1[,,,,I,,drop=FALSE]
Dsub2 = Dsub2[,,,,I,,drop=FALSE]
dim(D)


###################################################
### code chunk number 15: SCNqualityControlsiRNA1
###################################################
D1 = (D[,,,1,,1] + D[,,,1,,2] + D[,,,2,,1] + D[,,,2,,2])/4


###################################################
### code chunk number 16: SCNqualityControlsiRNA2
###################################################
Cdesign1 = rep(NA_real_,dim(D)[1])
for (k in 1:dim(D)[5]) {
  for (i in 1:dim(D)[1]) {
    Cdesign1[i] = cor(as.vector(D1[i,1,,k]),as.vector(D1[i,2,,k]))
  }
  if ( k == 1) {
    Cdesign1all = Cdesign1
  } else {
    Cdesign1all = Cdesign1all + Cdesign1
  }
}
Cdesign1all = Cdesign1all / dim(D)[5]


###################################################
### code chunk number 17: SCNqualityControlsiRNA4
###################################################
I = IndexSAMPLE[Cdesign1all[IndexSAMPLE] >= 0.7]
I = c(I, IndexNEG)
D = D[I,,,,,,drop=FALSE]
Dsub1 = Dsub1[I,,,,,,drop=FALSE]
Dsub2 = Dsub2[I,,,,,,drop=FALSE]
TargetAnnotation = TargetAnnotation[I,]


###################################################
### code chunk number 18: SCNfeatureSelection3
###################################################
data(stabilitySelection, package="HD2013SGI")
Sel = stabilitySelection$ratioPositive >= 0.5


###################################################
### code chunk number 19: SCNfeatureSelection7
###################################################
D = D[,,,,stabilitySelection$selected[Sel],,drop=FALSE]
dimnames(D)[[1]] = TargetAnnotation$Symbol
dimnames(D)[[3]] = QueryAnnotation$Symbol
Dsub1 = Dsub1[,,,,stabilitySelection$selected[Sel],,drop=FALSE]
Dsub2 = Dsub2[,,,,stabilitySelection$selected[Sel],,drop=FALSE]
dimnames(Dsub1) = dimnames(D)
dimnames(Dsub2) = dimnames(D)


###################################################
### code chunk number 20: SCNpairwiseInteractionScores1
###################################################
getInteractions <- function(D) {
  pimatrix = datamatrix
  pimatrix$D[] = NA_real_
  mainEffects = list(target = D[,,1,,,],
                     query = D[1,,,,,],
                     overall = D[1,,1,,,],
                     Anno = datamatrix$Anno)

  for (i in 1:2) {
    for (j in 1:2) {
      for (k in 1:dim(D)[5]) {
        for (l in 1:2) {
          MP = HD2013SGImaineffects(D[,i,,j,k,l],
              TargetNeg=which(TargetAnnotation$group == "negctrl"))
          pimatrix$D[,i,,j,k,l] = MP$pi
          mainEffects$target[,i,j,k,l] = MP$targetMainEffect
          mainEffects$query[i,,j,k,l] = MP$queryMainEffect
          mainEffects$overall[i,j,k,l] = MP$neg
        }
      }
    }
  }
  D = pimatrix$D
  PADJ = D[pimatrix$Anno$target$group == "sample",,,,,1]
  s = rep(NA_real_, dim(D)[5])
  for (i in 1:dim(D)[5]) {
    Data = D[,,,,i,]
    Data = Data[pimatrix$Anno$target$group == "sample",,,,]
    d = dim(Data)
    dim(Data) = c(prod(d[1:4]),prod(d[5]))
    Data[abs(Data[,1]-Data[,2]) >
           4*mad(Data[,1]-Data[,2],center=0.0),] = NA_real_

    s[i] = median(apply(Data,1,sd), na.rm=TRUE)
    padj = rep(NA_real_, nrow(Data))
    K = which(apply(!is.na(Data),1,all))
    fit = eBayes(lmFit(Data[K,]))
    padj[K] = p.adjust(fit$p.value, method="BH")
    PADJ[,,,,i] = padj
    cat("i=",i," nr int (1%) = ",sum(padj <= 0.01,na.rm=TRUE)/nrow(Data),
        " nr int (3%) = ",sum(padj <= 0.03,na.rm=TRUE)/nrow(Data),"\n")
  }
  PI = pimatrix$D
  PI = PI[pimatrix$Anno$target$group == "sample",,,,,]
  Interactions = list(piscore = PI,
                      scale = s,
                      padj = PADJ,
                      Anno = pimatrix$Anno)
  Interactions$Anno$target = Interactions$Anno$target[
              pimatrix$Anno$target$group == "sample",]
  Interactions
}

InteractionsSub4 = getInteractions(D)
InteractionsSub1 = getInteractions(Dsub1)
InteractionsSub2 = getInteractions(Dsub2)


###################################################
### code chunk number 21: SCNnumbers
###################################################
field1 = sum(InteractionsSub1$padj <= 0.01,na.rm=TRUE)
field2 = sum(InteractionsSub2$padj <= 0.01,na.rm=TRUE)
field4 = sum(InteractionsSub4$padj <= 0.01,na.rm=TRUE)


###################################################
### code chunk number 22: SCNnumbers
###################################################
pdf(file.path("result","Figures","subsamplingNrInteractions.pdf"),
    height=5,width=5)


###################################################
### code chunk number 23: SCNnumbers
###################################################
plot(c(field1,field2,field4),type="b",pch=19,
     ylab="number of interactions",xlab="",
     xlim=c(0.5,3.5),ylim=c(0.0,field4),
     main="",xaxt="n",cex.lab=1.75,cex.axis=1.5)
axis(side=1,at=1:3,labels=c("","",""))
axis(side=1,at=1:3,labels=c("1775\ncells","3550\ncells","7100\ncells"),
     line=1.5,lwd=0,cex.axis=1.5)


###################################################
### code chunk number 24: SCNnumbers
###################################################
dev.off()


