#' Read CmdStan Binary Output
#'
#' Reads binary output files produced by CmdStan with format=bin option.
#'
#' @param path Base path without extension (e.g., "output" reads output.bin + output.meta)
#' @return A matrix with named columns containing posterior samples
#' @export
#'
#' @examples
#' draws <- read_bin_stan("output")
#' draws[1:10, c("lp__", "theta.1")]
read_bin_stan <- function(path) {
meta_path <- paste0(path, ".meta")
bin_path <- paste0(path, ".bin")
if (!file.exists(meta_path)) {
stop("Metadata file not found: ", meta_path, call. = FALSE)
}
if (!file.exists(bin_path)) {
stop("Binary file not found: ", bin_path, call. = FALSE)
}
# Parse metadata
meta_lines <- readLines(meta_path, warn = FALSE)
meta <- list()
for (line in meta_lines) {
if (nchar(trimws(line)) == 0) {
next
}
if (!grepl("=", line, fixed = TRUE)) {
next
}
eq_pos <- regexpr("=", line, fixed = TRUE)[1]
key <- substr(line, 1, eq_pos - 1)
value <- substr(line, eq_pos + 1, nchar(line))
meta[[key]] <- value
}
# Validate version
version <- as.integer(meta[["version"]])
if (is.na(version)) {
stop("Invalid metadata: missing version field", call. = FALSE)
}
if (version > 1) {
stop(
"Unsupported binary format version: ",
version,
". Please update your reader.",
call. = FALSE
)
}
# Get dimensions
nrow <- as.integer(meta[["rows"]])
ncol <- as.integer(meta[["cols"]])
if (is.na(nrow) || is.na(ncol)) {
stop("Invalid metadata: missing or invalid rows/cols", call. = FALSE)
}
if (nrow == 0) {
warning("Binary file contains 0 rows")
mat <- matrix(numeric(0), nrow = 0, ncol = ncol)
return(mat)
}
# Verify endianness (default to little if not specified)
endian <- meta[["endian"]]
if (is.null(endian)) {
endian <- "little"
}
if (!endian %in% c("little", "big")) {
stop("Invalid endian specification: ", endian, call. = FALSE)
}
# Read binary data
expected_size <- nrow * ncol * 8
actual_size <- file.info(bin_path)$size
if (actual_size < expected_size) {
warning(sprintf(
"Binary file smaller than expected. Expected %d bytes for %d x %d matrix, got %d bytes. File may be truncated.",
expected_size,
nrow,
ncol,
actual_size
))
# Adjust nrow to what we can actually read
nrow <- floor(actual_size / (ncol * 8))
}
con <- file(bin_path, "rb")
on.exit(close(con), add = TRUE)
data <- readBin(
con,
what = "double",
n = nrow * ncol,
size = 8,
endian = endian
)
if (length(data) != nrow * ncol) {
stop(
sprintf(
"Data size mismatch: expected %d values, got %d",
nrow * ncol,
length(data)
),
call. = FALSE
)
}
# Reshape to matrix (R is column-major, data is row-major)
mat <- matrix(data, nrow = nrow, ncol = ncol, byrow = TRUE)
# Extract and apply column names
name_keys <- grep("^name\\.[0-9]+$", names(meta), value = TRUE)
if (length(name_keys) == ncol) {
ordered_names <- character(ncol)
for (key in name_keys) {
idx <- as.integer(sub("^name\\.", "", key)) + 1L
if (idx >= 1 && idx <= ncol) {
ordered_names[idx] <- meta[[key]]
}
}
colnames(mat) <- ordered_names
} else if (length(name_keys) > 0) {
warning(sprintf(
"Column name count mismatch: %d names for %d columns",
length(name_keys),
ncol
))
}
mat
}
#' Read Multiple Chains from CmdStan Binary Output
#'
#' @param base_pattern Base path pattern (e.g., "output" matches output_1, output_2, ...)
#' @param chains Number of chains to read
#' @return List of matrices, one per chain
#' @export
read_bin_stan_chains <- function(base_pattern, chains) {
result <- vector("list", chains)
for (i in seq_len(chains)) {
chain_path <- paste0(base_pattern, "_", i)
if (!file.exists(paste0(chain_path, ".meta"))) {
stop("Chain ", i, " not found: ", chain_path, ".meta", call. = FALSE)
}
result[[i]] <- read_bin_stan(chain_path)
}
names(result) <- paste0("chain_", seq_len(chains))
result
}
#' Get Metadata from CmdStan Binary Output
#'
#' @param path Base path without extension
#' @return List containing metadata fields
#' @export
read_bin_stan_meta <- function(path) {
meta_path <- paste0(path, ".meta")
if (!file.exists(meta_path)) {
stop("Metadata file not found: ", meta_path, call. = FALSE)
}
meta_lines <- readLines(meta_path, warn = FALSE)
meta <- list()
names_list <- list()
for (line in meta_lines) {
if (nchar(trimws(line)) == 0) {
next
}
if (!grepl("=", line, fixed = TRUE)) {
next
}
eq_pos <- regexpr("=", line, fixed = TRUE)[1]
key <- substr(line, 1, eq_pos - 1)
value <- substr(line, eq_pos + 1, nchar(line))
if (grepl("^name\\.", key)) {
idx <- as.integer(sub("^name\\.", "", key)) + 1L
names_list[[idx]] <- value
} else {
meta[[key]] <- value
}
}
# Convert numeric fields
meta$version <- as.integer(meta$version)
meta$rows <- as.integer(meta$rows)
meta$cols <- as.integer(meta$cols)
meta$names <- unlist(names_list)
meta
}