#' Start and stop \pkg{basilisk}-related processes
#'
#' Creates a \pkg{basilisk} process in which Python operations (via \pkg{reticulate}) 
#' can be safely performed with the correct versions of Python packages.
#'
#' @param env A \linkS4class{BasiliskEnvironment} object specifying the \pkg{basilisk} environment to use.
#' 
#' Alternatively, a string specifying the path to an environment, though this should only be used for testing purposes.
#' @param full.activation Deprecated and ignored.
#' @param proc A process object generated by \code{basiliskStart}.
#' @param fork Logical scalar indicating whether forking should be performed on non-Windows systems, see \code{\link{getBasiliskFork}}.
#' If \code{FALSE}, a new worker process is created using communication over sockets.
#' @param shared Logical scalar indicating whether \code{basiliskStart} is allowed 
#' to load a shared Python instance into the current R process, see \code{\link{getBasiliskShared}}.
#' @param fun A function to be executed in the \pkg{basilisk} process, see \dQuote{Constraints on user-defined functions}.
#' @param persist Logical scalar indicating whether to pass a persistent store to \code{fun}.
#' If \code{TRUE}, \code{fun} should accept a \code{store} argument.
#' @param testload Deprecated and ignored.
#' @param ... Further arguments to be passed to \code{fun}.
#'
#' @return 
#' \code{basiliskStart} returns a process object, the exact nature of which depends on \code{fork} and \code{shared}.
#' This object should only be used in \code{basiliskRun} and \code{basiliskStop}.
#'
#' \code{basiliskRun} returns the output of \code{fun(...)}, possibly executed inside the separate process.
#'
#' \code{basiliskStop} stops the process in \code{proc}.
#' 
#' @details
#' These functions ensure that any Python operations in \code{fun} will use the environment specified by \code{envname}.
#' This avoids version conflicts in the presence of other Python instances or environments loaded by other packages or by the user.
#' Thus, \pkg{basilisk} clients are not affected by (and if \code{shared=FALSE}, do not affect) the activity of other R packages.
#' 
#' It is good practice to call \code{basiliskStop} once computation is finished to terminate the process.
#' Any Python-related operations between \code{basiliskStart} and \code{basiliskStop} should only occur via \code{basiliskRun}.
#' Calling \pkg{reticulate} functions directly will have unpredictable consequences,
#' Similarly, it would be unwise to interact with \code{proc} via any function other than the ones listed here.
#'
#' If \code{proc=NULL} in \code{basiliskRun}, a process will be created and closed automatically.
#' This may be convenient in functions where persistence is not required.
#' Note that doing so requires specification of \code{pkgname} and \code{envname}.
#' 
#' @section Choice of process type:
#' \itemize{
#' \item If \code{shared=TRUE} and no Python version has already been loaded, 
#' \code{basiliskStart} will load Python directly into the R session from the specified environment.
#' Similarly, if the existing environment is the same as the requested environment, \code{basiliskStart} will use that directly.
#' This mode is most efficient as it avoids creating any new processes, 
#' but the use of a shared Python configuration may prevent non-\pkg{basilisk} packages from working correctly in the same session.
#' \item If \code{fork=TRUE}, no Python version has already been loaded and we are not on Windows, 
#' \code{basiliskStart} will create a new process by forking.
#' In the forked process, \code{basiliskStart} will load the specified environment for operations in Python.
#' This is less efficient as it needs to create a new process 
#' but it avoids forcing a Python configuration on other packages in the same R session.
#' \item Otherwise, \code{basiliskStart} will create a parallel socket process containing a separate R session.
#' In the new process, \code{basiliskStart} will load the specified environment for Python operations.
#' This is the least efficient as it needs to transfer data over sockets but is guaranteed to work.
#' }
#'
#' Developers can control these choices directly by explicitly specifying \code{shared} and \code{fork},
#' while users can control them indirectly with \code{\link{setBasiliskFork}} and related functions.
#'
#' @section Constraints on user-defined functions:
#' In \code{basiliskRun}, there is no guarantee that \code{fun} has access to \code{basiliskRun}'s calling environment.
#' This has several consequences for code in the body of \code{fun}:
#' \itemize{
#' \item Variables used inside \code{fun} should be explicitly passed as an argument to \code{fun}.
#' Developers should not rely on closures to capture variables in the calling environment of \code{basiliskRun}.
#' \item Developers should \emph{not} attempt to pass complex objects to memory in or out of \code{fun}.
#' This mostly refers to objects that contain custom pointers to memory, e.g., file handles, pointers to \pkg{reticulate} objects.
#' Both the arguments and return values of \code{fun} should be pure R objects.
#' \item Functions or variables from non-base R packages should be prefixed with the package name via \code{::}, or those packages should be reloaded inside \code{fun}.
#' }
#'
#' Developers can test that their function behaves correctly in \code{basiliskRun} by setting \code{\link{setBasiliskShared}} and \code{\link{setBasiliskFork}} to \code{FALSE}.
#' This forces the execution of \code{fun} in a new process; any incorrect assumption of shared environments will cause errors.
#'
#' @section Persisting objects across calls:
#' Objects created inside \code{fun} can be persisted across calls to \code{basiliskRun} by setting \code{persist=TRUE}.
#' This will instruct \code{basiliskRun} to pass a \code{store} argument to \code{fun} that can be used to store arbitrary objects.
#' Those same objects can be retrieved from \code{store} in later calls to \code{basiliskRun} using the same \code{proc}.
#' Any object can be stored in \code{.basilisk.store} but will remain strictly internal to \code{proc}.
#'
#' This capability is primarily useful when a Python workflow is split across multiple \code{basiliskRun} calls.
#' Each subsequent call can pick up from temporary intermediate objects generated by the previous call.
#' In this manner, \pkg{basilisk} enables modular function design where developers can easily mix and match different \code{basiliskRun} invocations.
#' See Examples for a working demonstration.
#'
#' @section Use of lazy installation:
#' If the specified \pkg{basilisk} environment is not present and \code{env} is a \linkS4class{BasiliskEnvironment} object, the environment will be created upon first use of \code{basiliskStart}.
#' These environments are created in an external user-writable directory defined by \code{\link{getExternalDir}}.
#' The location of this directory can be changed by setting the \code{BASILISK_EXTERNAL_DIR} environment variable to the desired path.
#' This may occasionally be necessary in rare cases, e.g., if the file path to the default location is too long for Windows.
#' 
#' Advanced users may consider setting the environment variable \code{BASILISK_USE_SYSTEM_DIR} to 1 when installing \pkg{basilisk} client packages from source.
#' This will place each package's Python environments in the R system directory, which simplifies permission management and avoids duplication in enterprise settings.
#'
#' @author Aaron Lun
#'
#' @seealso
#' \code{\link{setupBasiliskEnv}}, to set up the Python environments.
#'
#' \code{\link{getBasiliskFork}} and \code{\link{getBasiliskShared}}, to control various global options.
#' 
#' @examples
#' tmploc <- file.path(tempdir(), "my_package_A")
#' if (!file.exists(tmploc)) {
#'     setupBasiliskEnv(tmploc, c('pandas=2.2.3'))
#' }
#' 
#' # Pulling out the pandas version, as a demonstration:
#' cl <- basiliskStart(tmploc) 
#' basiliskRun(proc=cl, function() { 
#'     X <- reticulate::import("pandas"); X$`__version__` 
#' })
#' basiliskStop(cl)
#' 
#' # This happily co-exists with our other environment:
#' tmploc2 <- file.path(tempdir(), "my_package_B")
#' if (!file.exists(tmploc2)) {
#'     setupBasiliskEnv(tmploc2, c('pandas=2.2.2'))
#' }
#' 
#' cl2 <- basiliskStart(tmploc2) 
#' basiliskRun(proc=cl2, function() { 
#'     X <- reticulate::import("pandas"); X$`__version__` 
#' })
#' basiliskStop(cl2)
#' 
#' # Persistence of variables is possible within a Start/Stop pair.
#' cl <- basiliskStart(tmploc)
#' basiliskRun(proc=cl, function(store) {
#'     store$snake.in.my.shoes <- 1
#'     invisible(NULL)
#' }, persist=TRUE)
#' basiliskRun(proc=cl, function(store) {
#'     return(store$snake.in.my.shoes)
#' }, persist=TRUE)
#' basiliskStop(cl)
#'
#' @export
#' @importFrom parallel makePSOCKcluster clusterCall makeForkCluster
#' @importFrom reticulate py_config py_available
basiliskStart <- function(env, full.activation=NA, fork=getBasiliskFork(), shared=getBasiliskShared(), testload=NULL) {
    envpath <- obtainEnvironmentPath(env)

    if (shared) {
        proc <- new.env()

        ok <- FALSE
        if (py_available()) {
            if (.same_as_loaded(envpath)) {
                ok <- TRUE
            }
        } else {
            proc$.basilisk.restore <- useBasiliskEnv(envpath, full.activation)
            ok <- TRUE
        }

        if (ok) {
            return(proc)
        }
    } 

    # Create a separate R process if the shared instance doesn't work.
    if (fork && !isWindows() && (!py_available() || .same_as_loaded(envpath))) {
        proc <- makeForkCluster(1)
    } else {
        proc <- makePSOCKcluster(1)
    }

    # No need to store the function returned by useBasiliskEnv; once we call
    # basiliskStop, we stop the process, so there's no point resetting the variables.
    clusterCall(proc, useBasiliskEnv, envpath=envpath, full.activation=full.activation)

    clusterCall(proc, .instantiate_store)

    proc
}

.instantiate_store <- function() {
    assign(".basilisk.store", new.env(), .GlobalEnv)    
}

#' @export
#' @rdname basiliskStart
#' @importFrom parallel stopCluster
basiliskStop <- function(proc) {
    if (!is.environment(proc)) {
        stopCluster(proc)
    } else {
        restore <- proc$.basilisk.restore
        if (!is.null(restore)) {
            restore()
        }
    }
}

#' @export
#' @rdname basiliskStart
#' @importFrom parallel clusterCall
basiliskRun <- function(proc=NULL, fun, ..., env, full.activation=NA, persist=FALSE, fork=getBasiliskFork(), shared=getBasiliskShared(), testload=NULL) {
    if (is.null(proc)) {
        proc <- basiliskStart(env, full.activation=full.activation, fork=fork, shared=shared, testload=testload)
        on.exit(basiliskStop(proc), add=TRUE)
    }

    if (is.environment(proc)) {
        if (persist) {
            output <- fun(..., store=proc)
        } else {
            output <- fun(...) 
        }
    } else {
        if (persist) {
            wrapper <- function(.fun, ...) .fun(..., store=get(".basilisk.store", envir=.GlobalEnv)) 
            output <- clusterCall(proc, fun=wrapper, .fun=fun, ...)[[1]]
        } else {
            output <- clusterCall(proc, fun=fun, ...)[[1]]
        }
    } 

    output
}
