% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/machinelearning-functions-knntl.R
\name{knntlOptimisation}
\alias{knntlOptimisation}
\title{theta parameter optimisation}
\usage{
knntlOptimisation(
  primary,
  auxiliary,
  fcol = "markers",
  k,
  times = 50,
  test.size = 0.2,
  xval = 5,
  by = 0.5,
  length.out,
  th,
  xfolds,
  BPPARAM = BiocParallel::bpparam(),
  method = "Breckels",
  log = FALSE,
  seed
)
}
\arguments{
\item{primary}{An instance of class \code{"\linkS4class{MSnSet}"}.}

\item{auxiliary}{An instance of class
\code{"\linkS4class{MSnSet}"}.}

\item{fcol}{The feature meta-data containing marker definitions.
Default is \code{markers}.}

\item{k}{Numeric vector of length 2, containing the best \code{k}
parameters to use for the primary (\code{k[1]}) and auxiliary
(\code{k[2]}) datasets. See \code{knnOptimisation} for
generating best \code{k}.}

\item{times}{The number of times cross-validation is
performed. Default is 50.}

\item{test.size}{The size of test (validation) data. Default is
0.2 (20 percent).}

\item{xval}{The number of rounds of cross-validation to perform.}

\item{by}{The increment for theta, must be one of \code{c(1, 0.5,
0.25, 0.2, 0.15, 0.1, 0.05)}}

\item{length.out}{Alternative to using \code{by}
parameter. Specifies the desired length of the sequence of
theta to test.}

\item{th}{A matrix of theta values to test for each class as
generated from the function \code{\link{thetas}}, the number
of columns should be equal to the number of classes contained
in \code{fcol}. Note: columns will be ordered according to
\code{getMarkerClasses(primary, fcol)}. This argument is only
valid if the default method 'Breckels' is used.}

\item{xfolds}{Option to pass specific folds for the cross
validation.}

\item{BPPARAM}{Required for parallelisation. If not specified
selects a default \code{BiocParallelParam}, from global
options or, if that fails, the most recently registered()
back-end.}

\item{method}{The k-NN transfer learning method to use. The
default is 'Breckels' as described in the Breckels et al
(2016). If 'Wu' is specificed then the original method
implemented Wu and Dietterich (2004) is implemented.}

\item{log}{A \code{logical} defining whether logging should be
enabled. Default is \code{FALSE}. Note that logging produes
considerably bigger objects.}

\item{seed}{The optional random number generator seed.}
}
\value{
A list of containing the theta combinations tested,
    associated macro F1 score and accuracy for each combination
    over each round (specified by times).
}
\description{
Classification parameter optimisation for the KNN implementation
of Wu and Dietterich's transfer learning schema
}
\details{
\code{knntlOptimisation} implements a variation of Wu and
Dietterich's transfer learning schema: P. Wu and
T. G. Dietterich. Improving SVM accuracy by training on auxiliary
data sources. In Proceedings of the Twenty-First International
Conference on Machine Learning, pages 871 - 878.  Morgan Kaufmann,
2004. A grid search for the best theta is performed.
}
\examples{
## Load example primary and auxiliary data from pRolocdata
library(pRolocdata)
data(andy2011)
data(andy2011goCC)

## reducing calculation time of k by pre-running knnOptimisation
x <- c(andy2011, andy2011goCC)
k <- lapply(x, function(z)
            knnOptimisation(z, times=5,
                            fcol = "markers.orig",
                            verbose = FALSE))
k <- sapply(k, function(z) getParams(z))

## Use by = 1 in optimisation i.e. give full weight to the 
## primary (indicated by 1) or full weight to auxiliary 
## (indicated by 0) reducing parameter search for example
## in this documentation only. See the transfer learning
## vignette for examples and details. 
opt <- knntlOptimisation(andy2011, andy2011goCC,
                         fcol = "markers.orig",
                         times = 2,
                         by = 1, 
                         k = k)
th <- getParams(opt)
plot(opt)

## Now perform classification after finding the best weights
res <- knntlClassification(andy2011, andy2011goCC,
                           fcol = "markers.orig", 
                           th, 
                           k)
}
\references{
Breckels LM, Holden S, Wonjar D, Mulvey CM,
    Christoforou A, Groen AJ, Kohlbacher O, Lilley KS, Gatto L.
    Learning from heterogeneous data sources: an application in
    spatial proteomics. bioRxiv. doi:
    http://dx.doi.org/10.1101/022152

Wu P, Dietterich TG. Improving SVM Accuracy by Training on Auxiliary
Data Sources. Proceedings of the 21st International Conference on Machine
Learning (ICML); 2004.
}
\seealso{
\code{\link{knntlClassification}} and example therein.
}
\author{
Lisa Breckels
}
