samples.per.cluster,
compute.oob.predictions,
num.threads,
seed)
class(forest) <- c("regression_forest", "grf")
forest[["ci.group.size"]] <- ci.group.size
forest[["X.orig"]] <- X
forest[["Y.orig"]] <- Y
forest[["W.orig"]] <- W
forest[["Z.orig"]] <- Z
forest[["Y.hat"]] <- Y.hat
forest[["W.hat"]] <- W.hat
forest[["Z.hat"]] <- Z.hat
forest[["sample.weights"]] <- sample.weights
forest[["clusters"]] <- clusters
forest
}
#' Predict with a two-stage-least-squares forest
#'
#' @method predict regression_forest
#' @export
predict.regression_forest <- function(object, newdata = NULL,
num.threads = NULL,
estimate.variance = TRUE,
...) {
# If possible, use pre-computed predictions.
if (is.null(newdata) & !estimate.variance & !is.null(object$predictions)) {
return(data.frame(predictions=object$predictions,
debiased.error=object$debiased.error,
excess.error=object$excess.error))
}
num.threads = validate_num_threads(num.threads)
forest.short = object[-which(names(object) == "X.orig")]
X = object[["X.orig"]]
Y.centered = object[["Y.orig"]] - object[["Y.hat"]]
W.centered = object[["W.orig"]] - object[["W.hat"]]
Z.centered = object[["Z.orig"]] - object[["Z.hat"]]
train.data <- create_data_matrices(X, Y.centered, W.centered, Z.centered)
outcome.index = ncol(X) + 1
treatment.index <- ncol(X) + 2
instrument.index <- seq(from = ncol(X) + 3, to = ncol(X) + 3 + ncol(Z)-1 ,by = 1)
if (!is.null(newdata) ) {
validate_newdata(newdata, object$X.orig)
data <- create_data_matrices(newdata)
ret = regression_predict(forest.short, train.data$default, train.data$sparse, outcome.index,
treatment.index, instrument.index, data$default, data$sparse,
num.threads, estimate.variance)
} else {
ret = regression_predict_oob(forest.short, train.data$default, train.data$sparse, outcome.index,
treatment.index, instrument.index, num.threads, estimate.variance)
}
# Convert list to data frame.
empty = sapply(ret, function(elem) length(elem) == 0)
do.call(cbind.data.frame, ret[!empty])
}
if (is.null(Z.hat)) {
forest.Z.list <- lapply(seq_len(ncol(Z)),function(i) {do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))}
Z = matrix(nrow = nrow(X), ncol = ncol(Z))
i = 1
for(forest in forest.Z.list) {
Z.hat[,i] = predict(forest.Z)$predictions
i = i + 1
}
}
else if (length(Z.hat) == 1) {
Z.hat  <- matrix(rep(Z.hat, nrow(X)*ncol(Z)), nrow=nrow(X), ncol=ncol(Z))
} else if (is.vector(Z.hat)) {
if(length(Z.hat != ncol(Z))) {
stop("Z.hat has not the same number of columns as Z")
} else {
Z.hat  <- matrix(rep(Z.hat, nrow(X)), nrow=nrow(X), ncol = ncol(Z), byrow = TRUE)
}
} else if (ncol(Z.hat) != ncol(Z) | nrow(Z.hat)!=nrow(X)) {
stop("Z has the wrong number of rows and/or columns")
}
data <- create_data_matrices(X, Y - Y.hat, W - W.hat, Z - Z.hat, sample.weights = sample.weights)
outcome.index <- ncol(X) + 1
treatment.index <- ncol(X) + 2
instrument.index <- seq(from = ncol(X) + 3, to = ncol(X) + 3 + ncol(Z)-1 ,by = 1)
sample.weight.index <- ncol(X) + 4
instrument.index <- instrument.index - 1 # C++ format!
forest <- tsls_train(data$default, data$sparse,
outcome.index, treatment.index, instrument.index, sample.weight.index,
!is.null(sample.weights),
as.numeric(mtry),
num.trees,
as.numeric(min.node.size),
as.numeric(sample.fraction),
honesty,
coerce_honesty_fraction(honesty.fraction),
ci.group.size,
as.numeric(alpha),
as.numeric(imbalance.penalty),
clusters,
samples.per.cluster,
compute.oob.predictions,
num.threads,
seed)
class(forest) <- c("regression_forest", "grf")
forest[["ci.group.size"]] <- ci.group.size
forest[["X.orig"]] <- X
forest[["Y.orig"]] <- Y
forest[["W.orig"]] <- W
forest[["Z.orig"]] <- Z
forest[["Y.hat"]] <- Y.hat
forest[["W.hat"]] <- W.hat
forest[["Z.hat"]] <- Z.hat
forest[["sample.weights"]] <- sample.weights
forest[["clusters"]] <- clusters
forest
}
#' Predict with a two-stage-least-squares forest
#'
#' @method predict regression_forest
#' @export
predict.regression_forest <- function(object, newdata = NULL,
num.threads = NULL,
estimate.variance = TRUE,
...) {
# If possible, use pre-computed predictions.
if (is.null(newdata) & !estimate.variance & !is.null(object$predictions)) {
return(data.frame(predictions=object$predictions,
debiased.error=object$debiased.error,
excess.error=object$excess.error))
}
num.threads = validate_num_threads(num.threads)
forest.short = object[-which(names(object) == "X.orig")]
X = object[["X.orig"]]
Y.centered = object[["Y.orig"]] - object[["Y.hat"]]
W.centered = object[["W.orig"]] - object[["W.hat"]]
Z.centered = object[["Z.orig"]] - object[["Z.hat"]]
train.data <- create_data_matrices(X, Y.centered, W.centered, Z.centered)
outcome.index = ncol(X) + 1
treatment.index <- ncol(X) + 2
instrument.index <- seq(from = ncol(X) + 3, to = ncol(X) + 3 + ncol(Z)-1 ,by = 1)
if (!is.null(newdata) ) {
validate_newdata(newdata, object$X.orig)
data <- create_data_matrices(newdata)
ret = regression_predict(forest.short, train.data$default, train.data$sparse, outcome.index,
treatment.index, instrument.index, data$default, data$sparse,
num.threads, estimate.variance)
} else {
ret = regression_predict_oob(forest.short, train.data$default, train.data$sparse, outcome.index,
treatment.index, instrument.index, num.threads, estimate.variance)
}
# Convert list to data frame.
empty = sapply(ret, function(elem) length(elem) == 0)
do.call(cbind.data.frame, ret[!empty])
}
if (is.null(Z.hat)) {
forest.Z.list <- lapply(seq_len(ncol(Z)),function(i) {do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))})
Z = matrix(nrow = nrow(X), ncol = ncol(Z))
i = 1
for(forest in forest.Z.list) {
Z.hat[,i] = predict(forest.Z)$predictions
i = i + 1
}
}
if (is.null(Z.hat)) {
forest.Z.list <- lapply(seq_len(ncol(Z)),function(i) {do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))})
Z = matrix(nrow = nrow(X), ncol = ncol(Z))
i = 1
for(forest in forest.Z.list) {
Z.hat[,i] = predict(forest)$predictions
i = i + 1
}
}
library(grftsls)
n <- 2000
p <- 10
X <- matrix(rnorm(n * p), n, p)
W <- rbinom(n, 1, 0.4 + 0.2 * (X[, 1] > 0))
Y <- pmax(X[, 1], 0) * W + X[, 2] + pmin(X[, 3], 0) + rnorm(n)
Z <- cbind(W*0.4 + rnorm(n), W*0.4 + rnorm(n), W*0.2 + rnorm(n), W*0.9 + rnorm(n), W*0.234 + rnorm(n))
if (is.null(Z.hat)) {
forest.Z.list <- lapply(seq_len(ncol(Z)),function(i) {do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))})
Z = matrix(nrow = nrow(X), ncol = ncol(Z))
i = 1
for(forest in forest.Z.list) {
Z.hat[,i] = predict(forest)$predictions
i = i + 1
}
}
W.hat
forest.Z.list[[1]]
forest.Z.list[1]
View(forest.Z.list)
forest.Z.list[[1]]
is.list(forest.Z.list[[1]])
is.double(forest.Z.list[[1]])
forest.Z.list[[1]]
for(forest in forest.Z.list) {
forest
}
for(forest in forest.Z.list) {
print(forest)
}
for(forest in forest.Z.list) {
forest[[1]]
}
for(forest in forest.Z.list) {
print(forest[[1]])
}
for(forest in forest.Z.list) {
print(forest)
}
i=0
for(forest in forest.Z.list) {
i = i+ 1
print(forest[[i]])
}
i=0
for(forest in forest.Z.list) {
i = i+ 1
print(forest[i])
}
if (is.null(Z.hat)) {
forest.Z.list <- lapply(seq_len(ncol(Z)),function(i) {do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))})
Z = matrix(nrow = nrow(X), ncol = ncol(Z))
i = 1
for(forest in forest.Z.list) {
Z.forest <- do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))
Z.hat[,i] = predict(forest)$predictions
i = i + 1
}
}
Z
library(grftsls)
n <- 2000
p <- 10
X <- matrix(rnorm(n * p), n, p)
W <- rbinom(n, 1, 0.4 + 0.2 * (X[, 1] > 0))
Y <- pmax(X[, 1], 0) * W + X[, 2] + pmin(X[, 3], 0) + rnorm(n)
Z <- cbind(W*0.4 + rnorm(n), W*0.4 + rnorm(n), W*0.2 + rnorm(n), W*0.9 + rnorm(n), W*0.234 + rnorm(n))
Z
Z.hat = matrix(nrow = nrow(X), ncol = ncol(Z))
i = 1
for(forest in forest.Z.list) {
Z.forest <- do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))
Z.hat[,i] = predict(forest)$predictions
i = i + 1
}
if (is.null(Z.hat)) {
forest.Z.list <- lapply(seq_len(ncol(Z)),function(i) {do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))})
Z.hat = matrix(nrow = nrow(X), ncol = ncol(Z))
i = 1
for(forest in forest.Z.list) {
Z.forest <- do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))
Z.hat[,i] = predict(forest)$predictions
i = i + 1
}
}
Z.hat
Z.hat = matrix(nrow = nrow(X), ncol = ncol(Z))
i = 1
for(forest in forest.Z.list) {
Z.forest <- do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))
Z.hat[,i] = predict(forest)$predictions
i = i + 1
}
Z.hat = matrix(nrow = nrow(X), ncol = ncol(Z))
for(i in seq_len(ncol(Z))) {
Z.forest <- do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))
Z.hat[,i] = predict(Z.forest)$predictions
i = i + 1
}
Z.hat = matrix(nrow = nrow(X), ncol = ncol(Z))
for(i in seq_len(ncol(Z))) {
Z.forest <- do.call(regression_forest, c(Y = list(Z[,i]), args.orthog))
Z.hat[,i] = predict(Z.forest)$predictions
}
forest.W <- do.call(regression_forest, c(Y = list(W), args.orthog))
W.hat <- predict(forest.W)$predictions
library(Rcpp)
library(devtools)
library(testthat)
library(roxygen2)
package.name <- "grftsls"
package.src <- "grftsls/src"
# Copy Rcpp bindings and C++ source into the package src directory. Note that we
# don't copy in third_party/Eigen, because for the R package build we provide
# access to the library through RcppEigen.
unlink(package.src, recursive = TRUE)
dir.create(package.src)
binding.files <- list.files("grftsls/bindings", full.names = TRUE)
file.copy(binding.files, package.src, recursive = FALSE)
file.copy("../core/src", package.src, recursive = TRUE)
file.copy("../core/third_party/optional", package.src, recursive = TRUE)
# Auto-generate documentation files
roxygen2::roxygenise(package.name)
library(grf)
matrix(runif(10 * 2), 10, 2)
library(Rcpp)
library(devtools)
library(testthat)
library(roxygen2)
package.name <- "grftsls"
package.src <- "grftsls/src"
# Copy Rcpp bindings and C++ source into the package src directory. Note that we
# don't copy in third_party/Eigen, because for the R package build we provide
# access to the library through RcppEigen.
unlink(package.src, recursive = TRUE)
dir.create(package.src)
fit.draws = matrix(runif(num.fit.reps * num.params), num.fit.reps, num.params)
num.fit.reps = 100
num.params = 5
fit.draws = matrix(runif(num.fit.reps * num.params), num.fit.reps, num.params)
View(fit.draws)
floor(2^(draws[, param] * (log(nrow(X)) / log(2) - 4))
floor(2^(draws[, param] * (log(nrow(X)) / log(2) - 4)))
floor(2^(draws[, 1] * (log(nrow(X)) / log(2) - 4)))
fit.draws[1,]
fit.draws[1,] <-  draws
draws <- fit.draws[1,]
floor(2^(draws[, 1] * (log(nrow(X)) / log(2) - 4)))
draws = rbind(c(draws))
floor(2^(draws[, 1] * (log(nrow(X)) / log(2) - 4)))
floor(2^(draws[, 1] * (log(nrow(2000)) / log(2) - 4)))
floor(2^(draws[, 1] * (log(2000)) / log(2) - 4)))
floor(2^(draws[, 1] * (log(2000) / log(2) - 4)))
floor(2^(draws[, 2] * (log(2000) / log(2) - 4)))
floor(2^(draws[, 3] * (log(2000) / log(2) - 4)))
2^(draws[, 3] * (log(2000) / log(2) - 4)
)
2^(draws[, 3] * (log(2000) / log(2) - 4))
floor(2^(1 * (log(2000) / log(2) - 4)))
floor(2^(4 * (log(2000) / log(2) - 4)))
floor(2^(2 * (log(2000) / log(2) - 4)))
runif()
runif(2)
runif(100)
max(runif(100000))
floor(2^(0.0001 * (log(2000) / log(2) - 4)))
floor(2^(0.0001 * (log(200000) / log(2) - 4)))
floor(2^(0.001 * (log(200000) / log(2) - 4)))
floor(2^(0.01 * (log(200000) / log(2) - 4)))
floor(2^(0.03 * (log(200000) / log(2) - 4)))
floor(2^(0.1 * (log(200000) / log(2) - 4)))
floor(2^(0.3 * (log(200000) / log(2) - 4)))
floor(2^(0.4 * (log(200000) / log(2) - 4)))
floor(2^(0.5 * (log(200000) / log(2) - 4)))
floor(2^(0.9 * (log(200000) / log(2) - 4)))
floor(2^(0.95 * (log(200000) / log(2) - 4)))
floor(2^(0.7 * (log(200000) / log(2) - 4)))
floor(2^(0.1 * (log(200000) / log(2) - 4)))
floor(2^(0.2 * (log(200000) / log(2) - 4)))
floor(2^(0.2 * (log(250000) / log(2) - 4)))
log(250000)
floor(2^(1 * (log(250000) / log(2) - 4)))
(log(250000) / log(2) - 4)
min = 2
floor(2^(1 * (log(250000) / log(2) - 4)))+min
floor(2^(0-001 * (log(250000) / log(2) - 4)))+min
floor(2^(0.001 * (log(250000) / log(2) - 4)))+min
0.05 + 0.45 * draws[, 1])
0.05 + 0.45 * draws[, 1]
0.05 + 0.45 * 1
0.05 + 0.45 * 0.5
0.05 + 0.45 * 0.8
max = 0.5
0.05 + (max - 0.05) * 0.8
fit.draws
debiased.errors = apply(fit.draws, 1, function(draw) {
debiased.errors = apply(fit.draws, 1, function(draw) {
params = c(fixed.params, get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1))
})
debiased.errors = apply(fit.draws, 1, function(draw) {
params = c(fixed.params, get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1))
})
))
debiased.errors = apply(fit.draws, 1, function(draw) {
params = c(fixed.params, get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1))
})
fixed.params
debiased.errors = apply(fit.draws, 1, function(draw) {
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1))
})
get_params_from_draw <- function(X, draws, max.sample.fraction = NULL, min.min.node.size = NULL) {
if (is.vector(draws))
draws = rbind(c(draws))
n = nrow(draws)
vapply(colnames(draws), function(param) {
if (param == "min.node.size")
return(floor(2^(draws[, param] * (log(nrow(X)) / log(2) - 4))) + min.min.node.size)
else if (param == "sample.fraction")
return(0.05 + (max.sample.fraction - 0.05) * draws[, param])
else if (param == "mtry")
return(ceiling(min(ncol(X), sqrt(ncol(X)) + 20) * draws[, param]))
else if (param == "alpha")
return(draws[, param] / 4)
else if (param == "imbalance.penalty")
return (-log(draws[, param]))
else
stop("Unrecognized parameter name provided: ", param)
}, FUN.VALUE = numeric(n))
}
debiased.errors = apply(fit.draws, 1, function(draw) {
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1))
})
max.sample.fraction = 0.5
min.min.node.size = 20
debiased.errors = apply(fit.draws, 1, function(draw) {
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1))
})
debiased.errors = apply(fit.draws, 1, function(draw) {
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1)
})
debiased.errors
fit.draws = matrix(runif(num.fit.reps * num.params), num.fit.reps, num.params)
fit.draws
draws <- fit.draws[1,]
draws = rbind(c(draws))
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1)
draw <- fit.draws[1,]
draw = rbind(c(draws))
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1)
colnames(fit.draws) = c( "min.node.size", "sample.fraction", "mtry", "alpha")
colnames(fit.draws) = c( "min.node.size", "sample.fraction", "mtry", "alpha", "imbalance.penalty")
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1)
colnames(fit.draws) = c( "min.node.size", "sample.fraction", "mtry", "alpha", "imbalance.penalty")
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1)
apply(fit.draws, 1, function(draw) {
draw
#get_params_from_draw(X, draw,
#                     max.sample.fraction = max.sample.fraction,
#                     min.min.node.size = min.min.node.size-1)
})
apply(fit.draws, 1, function(draw) {
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1)
})
X <- matrix(rnorm(10000 * 10), 10000, 10)
apply(fit.draws, 1, function(draw) {
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1)
})
a <- apply(fit.draws, 1, function(draw) {
get_params_from_draw(X, draw,
max.sample.fraction = max.sample.fraction,
min.min.node.size = min.min.node.size-1)
})
a[, 1]
a[1, ]
min(a[1, ])
library(grftsls)
draws = runiform(1000)
max(min.mtry,ceiling(ncol(X), sqrt(ncol(X)) + 20) * draws))
draws = uniform(1000)
draws = runif(1000)
max(min.mtry,ceiling(ncol(X), sqrt(ncol(X)) + 20) * draws))
draws = runif(1000)
max(min.mtry,ceiling((ncol(X), sqrt(ncol(X)) + 20) * draws[, param]))
max(min.mtry,ceiling(min(ncol(X), sqrt(ncol(X)) + 20) * draws[, param]))
max(4,ceiling(min(ncol(X), sqrt(ncol(X)) + 20) * draws[, param]))
draws = runif(1000)
max(4,ceiling(min(7, sqrt(7) + 20) * draws[, param]))
max(4,ceiling(min(7, sqrt(7) + 20) * draws))
draws = runif(1000)
max(4,ceiling(min(7, sqrt(7) + 20) * draws))
draws = runif(1000)
max(4,ceiling(min(7, sqrt(7) + 20) * draws))
draws = runif(1000)
max(4,ceiling(min(7, sqrt(7) + 20) * draws))
draws
ceiling(min(7, sqrt(7) + 20) * draws)
draws = runif(1000)
ceiling(min(ncol.X, sqrt(ncol.X) + 20) * draws) + min.mtry
draws = runif(1000)
ceiling(min(ncol(X), sqrt(ncol(X)) + 20) * draws) + min.mtry
draws = runif(1000)
ceiling(min(7, sqrt(7) + 20) * draws) + 5
draws = runif(1000)
max(ceiling(min(7, sqrt(7) + 20) * draws) + rep(5,1000))
draws = runif(1000)
max(ceiling(min(7, sqrt(7) + 20) * draws), rep(5,1000))
pmax(ceiling(min(7, sqrt(7) + 20) * draws), rep(5,1000))
pmax(ceiling(min(7, sqrt(7) + 20) * draws),5)
?pmax
a <- seq(1,1,1000)
a
a <- seq(1,1000,1)
a
pmax(a, 500
)
install.packages("E:/_Philipp/grftsls_0.10.3.tar.gz", repos = NULL, type="source")
install.packages("installR")
install.packages("installr")
library(installr)
