diff --git a/DESCRIPTION b/DESCRIPTION
index 8a070ff28..2ecffdba1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: TreeTools
 Title: Create, Modify and Analyse Phylogenetic Trees
-Version: 2.3.0.9001
+Version: 2.3.0.9002
 Authors@R: c(
   person("Martin R.", 'Smith', role = c("aut", "cre", "cph"), 
          email = "martin.smith@durham.ac.uk",
@@ -63,6 +63,7 @@ Config/Needs/memcheck: pkgdown, testthat
 Config/Needs/metadata: codemeta
 Config/Needs/revdeps: revdepcheck
 Config/Needs/website: pkgdown
+Config/roxygen2/version: 8.0.0
 Config/testthat/parallel: false
 Config/testthat/edition: 3
 LinkingTo: Rcpp
@@ -73,4 +74,3 @@ Encoding: UTF-8
 Language: en-GB
 VignetteBuilder: knitr
 Roxygen: list(markdown = TRUE)
-Config/roxygen2/version: 8.0.0
diff --git a/NAMESPACE b/NAMESPACE
index 7f34d68f7..e3501d5e7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -372,6 +372,7 @@ export(NewickTree)
 export(NeworderPhylo)
 export(NeworderPruningwise)
 export(NexusTokens)
+export(NexusTokensToInteger)
 export(NodeDepth)
 export(NodeNumbers)
 export(NodeOrder)
diff --git a/NEWS.md b/NEWS.md
index 072e61091..cb2166e7e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,9 +1,17 @@
-# TreeTools 2.3.0.9001 (development) #
+# TreeTools 2.3.0.9002 (development) #
 
 ## New features
 
+- `NexusTokensToInteger()` converts character data to integers,
+  mapping uncertain tokens to `NA`.
 - `ReadTntCharacters()` attaches an `xgroup` attribute (factor) when a TNT
-  `xgroup` partition block is present, replacing the standalone `ReadXgroup()`.
+  `xgroup` partition block is present, replacing the stand-alone `ReadXgroup()`.
+
+## Fixes
+
+- `NexusTokens()` once again handles polymorphism tokens with internal
+  whitespace (e.g. `(1 2)`, `{0 1}`).
+
 
 # TreeTools 2.3.0 (2026-04-22) #
 
@@ -11,8 +19,7 @@
 
 - `ReadTntCharacters()` now handles multi-line comments, bare `&` continuations,
   `@taxonomy` suffixes, name-only taxon lines, mid-line `xread`, smart-quote
-  names (Windows-1252), and packed multi-taxon lines; all 13 Goloboff (2019)
-  corpus files parse cleanly.
+  names (Windows-1252), and packed multi-taxon lines.
 
 ## Performance
 
diff --git a/R/parse_files.R b/R/parse_files.R
index 7201460fd..978757cd6 100644
--- a/R/parse_files.R
+++ b/R/parse_files.R
@@ -1011,6 +1011,75 @@ PhyDat <- function(dataset) {
   MatrixToPhyDat(mat)
 }
 
+#' Convert Nexus token matrix to integer
+#'
+#' `NexusTokensToInteger()` converts the character matrix returned by
+#' [`ReadCharacters()`] to an integer matrix, mapping polymorphic,
+#' ambiguous (`?`), and inapplicable (`-`) tokens to `NA_integer_` or to the
+#' first/last state listed in the polymorphism, depending on `polymorphism`.
+#'
+#' Only digit states `0`..`9` are recognised; non-digit symbols (and any
+#' token whose interior contains no digits) become `NA_integer_`.
+#' Polymorphism extraction (`polymorphism = "first"`/`"last"`) likewise
+#' considers digits only.
+#'
+#' If `tokens` is a `phyDat` object it is first converted via
+#' [`PhyDatToMatrix()`] with `ambigNA = TRUE, inappNA = TRUE`, so that
+#' fully-ambiguous and inapplicable rows become `NA_integer_` and only
+#' true partial polymorphisms are subject to the `polymorphism` rule.
+#'
+#' @param tokens Character matrix as returned by [`ReadCharacters()`], a
+#' character vector as returned by [`NexusTokens()`], or a `phyDat` object.
+#' @param polymorphism Character string specifying how to handle polymorphic
+#' tokens such as `"(01)"` or `"{12}"`:
+#' \describe{
+#'   \item{`"?"` (default)}{Treat as the NEXUS missing-data token: map to
+#'     `NA_integer_`.}
+#'   \item{`"first"`}{Use the first state digit inside the brackets.}
+#'   \item{`"last"`}{Use the last state digit inside the brackets.}
+#' }
+#' Tokens `"?"` and `"-"` always map to `NA_integer_` regardless of
+#' `polymorphism`.
+#'
+#' @return An integer matrix (or vector) with the same dimensions and
+#' `dimnames` as `tokens`.
+#'
+#' @examples
+#' tokens <- matrix(c("0", "(12)", "1", "?", "-"),
+#'                  nrow = 1,
+#'                  dimnames = list("Taxon_A", paste0("C", 1:5)))
+#' NexusTokensToInteger(tokens)
+#' NexusTokensToInteger(tokens, polymorphism = "first")
+#'
+#' @family phylogenetic matrix conversion functions
+#' @template MRS
+#' @export
+NexusTokensToInteger <- function(tokens,
+                                 polymorphism = c("?", "first", "last")) {
+  polymorphism <- match.arg(polymorphism)
+  if (inherits(tokens, "phyDat")) {
+    tokens <- PhyDatToMatrix(tokens, ambigNA = TRUE, inappNA = TRUE)
+  }
+  at <- attributes(tokens)
+
+  x <- as.character(tokens)
+  result <- suppressWarnings(as.integer(x))
+
+  ambig <- is.na(result) & !is.na(x) & x != "?" & x != "-"
+  if (polymorphism != "?" && any(ambig)) {
+    pattern <- if (polymorphism == "first") "\\d" else "\\d(?=[^\\d]*$)"
+    m <- regexpr(pattern, x[ambig], perl = TRUE)
+    matched <- regmatches(x[ambig], m)
+    digits <- rep(NA_character_, sum(ambig))
+    digits[m != -1L] <- matched
+    result[ambig] <- suppressWarnings(as.integer(digits))
+  }
+
+  attributes(result) <- at
+  result
+}
+
+
 #' Rightmost character of string
 #'
 #' `RightmostCharacter()` is a convenience function that returns the final
diff --git a/man/Decompose.Rd b/man/Decompose.Rd
index 24a69d1f6..1461d624e 100644
--- a/man/Decompose.Rd
+++ b/man/Decompose.Rd
@@ -73,6 +73,7 @@ NumberOfChars(decomposed) # 116 characters in decomposed
 \seealso{
 Other phylogenetic matrix conversion functions:
 \code{\link[=MatrixToPhyDat]{MatrixToPhyDat()}},
+\code{\link[=NexusTokensToInteger]{NexusTokensToInteger()}},
 \code{\link[=Reweight]{Reweight()}},
 \code{\link[=StringToPhyDat]{StringToPhyDat()}}
 }
diff --git a/man/MatrixToPhyDat.Rd b/man/MatrixToPhyDat.Rd
index ea3b4645d..b57517b05 100644
--- a/man/MatrixToPhyDat.Rd
+++ b/man/MatrixToPhyDat.Rd
@@ -68,6 +68,7 @@ head(PhyDatToMatrix(Lobo.phy)[, 91:93])
 \seealso{
 Other phylogenetic matrix conversion functions:
 \code{\link[=Decompose]{Decompose()}},
+\code{\link[=NexusTokensToInteger]{NexusTokensToInteger()}},
 \code{\link[=Reweight]{Reweight()}},
 \code{\link[=StringToPhyDat]{StringToPhyDat()}}
 }
diff --git a/man/NexusTokensToInteger.Rd b/man/NexusTokensToInteger.Rd
new file mode 100644
index 000000000..4ccbf16bf
--- /dev/null
+++ b/man/NexusTokensToInteger.Rd
@@ -0,0 +1,64 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parse_files.R
+\name{NexusTokensToInteger}
+\alias{NexusTokensToInteger}
+\title{Convert Nexus token matrix to integer}
+\usage{
+NexusTokensToInteger(tokens, polymorphism = c("?", "first", "last"))
+}
+\arguments{
+\item{tokens}{Character matrix as returned by \code{\link[=ReadCharacters]{ReadCharacters()}}, a
+character vector as returned by \code{\link[=NexusTokens]{NexusTokens()}}, or a \code{phyDat} object.}
+
+\item{polymorphism}{Character string specifying how to handle polymorphic
+tokens such as \code{"(01)"} or \code{"{12}"}:
+\describe{
+\item{\code{"?"} (default)}{Treat as the NEXUS missing-data token: map to
+\code{NA_integer_}.}
+\item{\code{"first"}}{Use the first state digit inside the brackets.}
+\item{\code{"last"}}{Use the last state digit inside the brackets.}
+}
+Tokens \code{"?"} and \code{"-"} always map to \code{NA_integer_} regardless of
+\code{polymorphism}.}
+}
+\value{
+An integer matrix (or vector) with the same dimensions and
+\code{dimnames} as \code{tokens}.
+}
+\description{
+\code{NexusTokensToInteger()} converts the character matrix returned by
+\code{\link[=ReadCharacters]{ReadCharacters()}} to an integer matrix, mapping polymorphic,
+ambiguous (\verb{?}), and inapplicable (\code{-}) tokens to \code{NA_integer_} or to the
+first/last state listed in the polymorphism, depending on \code{polymorphism}.
+}
+\details{
+Only digit states \code{0}..\code{9} are recognised; non-digit symbols (and any
+token whose interior contains no digits) become \code{NA_integer_}.
+Polymorphism extraction (\code{polymorphism = "first"}/\code{"last"}) likewise
+considers digits only.
+
+If \code{tokens} is a \code{phyDat} object it is first converted via
+\code{\link[=PhyDatToMatrix]{PhyDatToMatrix()}} with \verb{ambigNA = TRUE, inappNA = TRUE}, so that
+fully-ambiguous and inapplicable rows become \code{NA_integer_} and only
+true partial polymorphisms are subject to the \code{polymorphism} rule.
+}
+\examples{
+tokens <- matrix(c("0", "(12)", "1", "?", "-"),
+                 nrow = 1,
+                 dimnames = list("Taxon_A", paste0("C", 1:5)))
+NexusTokensToInteger(tokens)
+NexusTokensToInteger(tokens, polymorphism = "first")
+
+}
+\seealso{
+Other phylogenetic matrix conversion functions:
+\code{\link[=Decompose]{Decompose()}},
+\code{\link[=MatrixToPhyDat]{MatrixToPhyDat()}},
+\code{\link[=Reweight]{Reweight()}},
+\code{\link[=StringToPhyDat]{StringToPhyDat()}}
+}
+\author{
+\href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
+(\href{mailto:martin.smith@durham.ac.uk}{martin.smith@durham.ac.uk})
+}
+\concept{phylogenetic matrix conversion functions}
diff --git a/man/PhyToString.Rd b/man/PhyToString.Rd
index ec58c9f6a..021b30e3d 100644
--- a/man/PhyToString.Rd
+++ b/man/PhyToString.Rd
@@ -97,6 +97,7 @@ PhyToString(phyDat, concatenate = FALSE)
 Other phylogenetic matrix conversion functions:
 \code{\link[=Decompose]{Decompose()}},
 \code{\link[=MatrixToPhyDat]{MatrixToPhyDat()}},
+\code{\link[=NexusTokensToInteger]{NexusTokensToInteger()}},
 \code{\link[=Reweight]{Reweight()}}
 }
 \author{
diff --git a/man/Reweight.Rd b/man/Reweight.Rd
index 6bf37e24e..db3d6e894 100644
--- a/man/Reweight.Rd
+++ b/man/Reweight.Rd
@@ -69,6 +69,7 @@ Reweight(dat, c("3" = 0, "2" = 2))
 Other phylogenetic matrix conversion functions:
 \code{\link[=Decompose]{Decompose()}},
 \code{\link[=MatrixToPhyDat]{MatrixToPhyDat()}},
+\code{\link[=NexusTokensToInteger]{NexusTokensToInteger()}},
 \code{\link[=StringToPhyDat]{StringToPhyDat()}}
 }
 \author{
diff --git a/tests/testthat/test-parsers.R b/tests/testthat/test-parsers.R
index d59a98f2b..71b6fdbe7 100644
--- a/tests/testthat/test-parsers.R
+++ b/tests/testthat/test-parsers.R
@@ -32,6 +32,134 @@ test_that("Nexus file can be parsed", {
   expect_equal(3L, unique(as.integer(read[3, ])))
 })
 
+test_that("ReadCharacters handles Cingulata-style polymorphism with internal whitespace", {
+  nexusContent <- "#NEXUS
+BEGIN CHARACTERS;
+  DIMENSIONS NCHAR=5;
+  FORMAT DATATYPE=STANDARD MISSING=? GAP=-;
+  MATRIX
+    Taxon_A  0(1 2)1?-
+    Taxon_B  1{0 1}0?-
+    Taxon_C  01010
+  ;"
+
+  tf <- tempfile(fileext = ".nex")
+  on.exit(unlink(tf))
+  writeLines(nexusContent, tf)
+  read <- ReadCharacters(tf)
+
+  expect_equal(dim(read), c(3L, 5L))
+  expect_equal(unname(read["Taxon_A", 2]), "(12)")
+  expect_equal(unname(read["Taxon_B", 2]), "{01}")
+  expect_equal(unname(read["Taxon_A", 4]), "?")
+  expect_equal(unname(read["Taxon_A", 5]), "-")
+  expect_equal(unname(read["Taxon_C", 1]), "0")
+
+  # Continuation lines: data for a taxon split across multiple lines
+  nexusMulti <- "#NEXUS
+BEGIN CHARACTERS;
+  DIMENSIONS NCHAR=6;
+  FORMAT DATATYPE=STANDARD MISSING=? GAP=-;
+  MATRIX
+    Taxon_A  0(1 2)1
+              ?-1
+    Taxon_B  1{0 1}0
+              ?-0
+  ;"
+
+  tf2 <- tempfile(fileext = ".nex")
+  on.exit(unlink(tf2), add = TRUE)
+  writeLines(nexusMulti, tf2)
+  readMulti <- ReadCharacters(tf2)
+
+  expect_equal(dim(readMulti), c(2L, 6L))
+  expect_equal(unname(readMulti["Taxon_A", 2]), "(12)")
+  expect_equal(unname(readMulti["Taxon_B", 2]), "{01}")
+  expect_equal(unname(readMulti["Taxon_A", 6]), "1")
+  expect_equal(unname(readMulti["Taxon_B", 6]), "0")
+
+  # Round-trip the ReadCharacters() matrix through NexusTokensToInteger().
+  intMat <- NexusTokensToInteger(read)
+  expect_equal(dim(intMat), c(3L, 5L))
+  expect_equal(unname(intMat["Taxon_A", 1]), 0L)
+  expect_equal(unname(intMat["Taxon_A", 2]), NA_integer_) # polymorphism -> ?
+  expect_equal(unname(intMat["Taxon_B", 2]), NA_integer_) # uncertainty -> ?
+  expect_equal(unname(intMat["Taxon_A", 4]), NA_integer_) # ?
+  expect_equal(unname(intMat["Taxon_A", 5]), NA_integer_) # -
+  expect_equal(unname(intMat["Taxon_C", 5]), 0L)
+  expect_equal(unname(NexusTokensToInteger(read, "first")["Taxon_A", 2]), 1L)
+  expect_equal(unname(NexusTokensToInteger(read, "last")["Taxon_A", 2]), 2L)
+})
+
+test_that("NexusTokensToInteger() converts token matrix to integer", {
+  tokens <- matrix(c("0", "(12)", "1", "?", "-"),
+                   nrow = 1L,
+                   dimnames = list("Tax", paste0("C", 1:5)))
+
+  # Default: polymorphisms and ambiguities become NA
+  result <- NexusTokensToInteger(tokens)
+  expect_equal(dim(result), c(1L, 5L))
+  expect_equal(dimnames(result), list("Tax", paste0("C", 1:5)))
+  expect_equal(unname(result["Tax", "C1"]), 0L)
+  expect_equal(unname(result["Tax", "C2"]), NA_integer_)
+  expect_equal(unname(result["Tax", "C3"]), 1L)
+  expect_equal(unname(result["Tax", "C4"]), NA_integer_)
+  expect_equal(unname(result["Tax", "C5"]), NA_integer_)
+
+  # polymorphism = "first": take first digit inside brackets
+  result_f <- NexusTokensToInteger(tokens, polymorphism = "first")
+  expect_equal(unname(result_f["Tax", "C2"]), 1L)
+  expect_equal(unname(result_f["Tax", "C4"]), NA_integer_)
+
+  # polymorphism = "last": take last digit inside brackets
+  result_l <- NexusTokensToInteger(tokens, polymorphism = "last")
+  expect_equal(unname(result_l["Tax", "C2"]), 2L)
+
+  # Braces form {01}
+  tokens2 <- matrix(c("{01}", "0"), nrow = 1L, dimnames = list("T1", c("C1", "C2")))
+  expect_equal(unname(NexusTokensToInteger(tokens2)["T1", "C1"]), NA_integer_)
+  expect_equal(unname(NexusTokensToInteger(tokens2, "first")["T1", "C1"]), 0L)
+  expect_equal(unname(NexusTokensToInteger(tokens2, "last")["T1", "C1"]), 1L)
+
+  # Named vector input (no dim attribute).
+  vec <- c(a = "0", b = "(12)", c = "?", d = "1")
+  vecOut <- NexusTokensToInteger(vec)
+  expect_null(dim(vecOut))
+  expect_equal(names(vecOut), c("a", "b", "c", "d"))
+  expect_equal(unname(vecOut), c(0L, NA_integer_, NA_integer_, 1L))
+  expect_equal(unname(NexusTokensToInteger(vec, "first")), c(0L, 1L, NA_integer_, 1L))
+  expect_equal(unname(NexusTokensToInteger(vec, "last")), c(0L, 2L, NA_integer_, 1L))
+
+  # No-digit polymorphism token: must not crash, must yield NA in every mode.
+  noDigit <- matrix(c("(AB)", "0"), nrow = 1L,
+                    dimnames = list("T", c("C1", "C2")))
+  expect_silent(NexusTokensToInteger(noDigit))
+  expect_equal(unname(NexusTokensToInteger(noDigit)["T", "C1"]), NA_integer_)
+  expect_equal(unname(NexusTokensToInteger(noDigit, "first")["T", "C1"]),
+               NA_integer_)
+  expect_equal(unname(NexusTokensToInteger(noDigit, "last")["T", "C1"]),
+               NA_integer_)
+  expect_equal(unname(NexusTokensToInteger(noDigit, "first")["T", "C2"]), 0L)
+
+  # state.labels and other matrix attributes round-trip through the result.
+  tokens3 <- matrix(c("0", "(12)", "1"), nrow = 1L,
+                    dimnames = list("Tax", c("C1", "C2", "C3")))
+  attr(tokens3, "state.labels") <- list(c("absent", "present"),
+                                        c("a", "b", "c"),
+                                        c("absent", "present"))
+  out3 <- NexusTokensToInteger(tokens3)
+  expect_equal(attr(out3, "state.labels"), attr(tokens3, "state.labels"))
+
+  # phyDat input routes through PhyDatToMatrix(ambigNA = TRUE, inappNA = TRUE).
+  phy <- MatrixToPhyDat(matrix(c("0", "(12)", "1", "?", "-"),
+                               nrow = 1L,
+                               dimnames = list("Tax", paste0("C", 1:5))))
+  viaPhy <- NexusTokensToInteger(phy)
+  viaMat <- NexusTokensToInteger(PhyDatToMatrix(phy, ambigNA = TRUE,
+                                                inappNA = TRUE))
+  expect_equal(unname(viaPhy), unname(viaMat))
+})
+
 test_that("NexusTokens() fails gracefully", {
   expect_error(NexusTokens("0123012301230123", integer(0)))
   expect_equal("Character number must be between 1 and 16.",