From b1729bdda98a01e6194e666eb50007beed0a052e Mon Sep 17 00:00:00 2001 From: SHAWNERZZ Date: Sun, 7 Jun 2026 22:21:21 -0700 Subject: [PATCH] Fix live-converge word casing (stuck all-caps + dropped auto-cap) Casing bugs specific to two-thumb live-converge / multi-part re-recognition, where every extending tap or swipe REPLACES the whole composing word with the recognizer's fresh (lowercase) output: - Dropped auto-cap (#5): "Hello" -> "hellow" on the first extension. - Stuck all-caps (#4): a short ambiguous swipe whose top pick is an all-caps acronym ("CSA") set WordComposer.isAllUpperCase, which forced every later suggestion upper, so the word stuck in caps ("CSA"->"CAN"->"CAME"). - Swipe-extension downcasing: "Was"+swipe -> "wait" (a second swipe re-entered onStartBatchInput and re-captured the now-cleared shift state). Approach: separate a word's casing INTENT from the recognizer's letters. - WordComposer.mCapitalizedMode is the persistent per-word intent: seeded at word start from auto-cap + shift, it survives the setBatchInputWord rebuild and is cleared only at commitWord. Exposed via getCapitalizedMode(). - New InputLogic.applyComposingCase(lemma, capsMode, locale) treats the recognizer output as a casing-NEUTRAL lemma (lowercased first) and re-applies the intent. Lowercasing first dissolves #4 at the source: the composing word is never all-caps, so isAllUpperCase never arms. - onStartBatchInput captures the intent only for a FRESH word (gated on !extendComposingWord), so an extending gesture preserves the first fragment's intent instead of re-capturing the auto-cleared shift state (#5 / "Wait"). - Fresh-word gesture capitalization is unchanged (still mShiftModeAtGestureStart, captured before any state mutates), so plain glide typing is byte-identical and a standalone acronym swipe ("CSA") still stays as-is. Tests: applyComposingCase covered directly (pure, native-free) plus intent lifecycle tests in InputLogicTest. :app:testOfflineDebugUnitTest green except the 3 documented pre-existing failures. Fixes #4, #5. Co-Authored-By: Claude Opus 4.8 --- .../keyboard/latin/WordComposer.java | 13 +++ .../keyboard/latin/inputlogic/InputLogic.java | 98 ++++++++++++++++--- .../keyboard/latin/InputLogicTest.kt | 85 ++++++++++++++++ 3 files changed, 182 insertions(+), 14 deletions(-) diff --git a/app/src/main/java/helium314/keyboard/latin/WordComposer.java b/app/src/main/java/helium314/keyboard/latin/WordComposer.java index e8d93e519..a7e1b8034 100644 --- a/app/src/main/java/helium314/keyboard/latin/WordComposer.java +++ b/app/src/main/java/helium314/keyboard/latin/WordComposer.java @@ -431,6 +431,19 @@ public void setCapitalizedModeAtStartComposingTime(final int mode) { mCapitalizedMode = mode; } + /** + * The capitalization intent captured when this word started composing (one of the + * {@code CAPS_MODE_*} constants). It is seeded from auto-cap + shift state at word-start, + * survives {@link #reset()} (so it persists across a {@link #setBatchInputWord} rebuild), and + * is cleared only in {@link #commitWord}. This makes it the persistent per-word source of + * truth for casing — used by the live-converge re-recognition path so that re-replacing the + * word never loses (or latches) its case. + * @return the capitalized mode for the current word + */ + public int getCapitalizedMode() { + return mCapitalizedMode; + } + /** * Before fetching suggestions, we don't necessarily know about the capitalized mode yet. *

diff --git a/app/src/main/java/helium314/keyboard/latin/inputlogic/InputLogic.java b/app/src/main/java/helium314/keyboard/latin/inputlogic/InputLogic.java index 975f571a8..7c322d658 100644 --- a/app/src/main/java/helium314/keyboard/latin/inputlogic/InputLogic.java +++ b/app/src/main/java/helium314/keyboard/latin/inputlogic/InputLogic.java @@ -115,10 +115,11 @@ public final class InputLogic { // GESTURE-START, not gesture-end (so a long gesture doesn't lose the promotion). private boolean mGestureExtendsByTapPromotion; - // Snapshot of {@code keyboardSwitcher.getKeyboardShiftMode()} captured at the start of - // each gesture. Used by {@link #onUpdateTailBatchInputCompleted} to capitalize the - // recognizer's lowercase output. We can't read getKeyboardShiftMode() at gesture-end - // because the keyboard typically auto-clears the shifted state during the gesture. + // Snapshot of {@code keyboardSwitcher.getKeyboardShiftMode()} captured at the very start of + // each gesture, BEFORE any state mutates (a prior word may auto-commit / the shift indicator + // may auto-clear within onStartBatchInput). Used ONLY by the FRESH-word capitalization in + // {@link #onUpdateTailBatchInputCompleted}. The live-converge / multi-part EXTEND path uses the + // persistent {@link WordComposer#getCapitalizedMode()} instead — see that method's javadoc. private int mShiftModeAtGestureStart = WordComposer.CAPS_MODE_OFF; /** Set to true at the end of {@link #onCombiningGraceExpired} when an autospace was * written, so the next punctuation tap in {@link #handleSeparatorEvent} can strip it @@ -733,9 +734,10 @@ public void onStartBatchInput(final SettingsValues settingsValues, markForceNextSpaceWordStarted(); // Snapshot the keyboard's shift mode BEFORE any state mutates — the shifted indicator // typically auto-clears once the gesture starts moving, so by the time - // onUpdateTailBatchInputCompleted fires the live mode reads as UNSHIFTED. - // We compute the *actual* caps mode (resolves AUTO_SHIFTED into AUTO_SHIFT_LOCKED if - // the input field is in all-caps), so a true all-caps field gives the right answer. + // onUpdateTailBatchInputCompleted fires the live mode reads as UNSHIFTED. This drives the + // FRESH-word capitalization only; the EXTEND path uses WordComposer.mCapitalizedMode (the + // persistent per-word intent). We compute the *actual* caps mode (resolves AUTO_SHIFTED + // into AUTO_SHIFT_LOCKED if the input field is all-caps) so a true all-caps field is right. mShiftModeAtGestureStart = getActualCapsMode(settingsValues, keyboardSwitcher.getKeyboardShiftMode()); mWordBeingCorrectedByCursor = null; mInputLogicHandler.onStartBatchInput(); @@ -855,8 +857,19 @@ public void onStartBatchInput(final SettingsValues settingsValues, } } mConnection.endBatchEdit(); - mWordComposer.setCapitalizedModeAtStartComposingTime( - getActualCapsMode(settingsValues, keyboardSwitcher.getKeyboardShiftMode())); + // Capture the word's casing intent ONLY when this gesture starts a fresh word. When it + // EXTENDS an existing composing word (multi-part swipe+swipe / manual spacing), the intent + // belongs to the word's first fragment and must be preserved: the keyboard auto-clears its + // shifted indicator after the first gesture, so re-capturing here would read UNSHIFTED and + // wrongly downcase a word that started capitalized ("Was"+swipe -> "wait" instead of + // "Wait"). mCapitalizedMode survives the setBatchInputWord rebuild and is cleared at + // commitWord, so leaving it untouched keeps the original intent alive across the extension. + // (Live-converge tap extensions bypass onStartBatchInput entirely, so they were already + // safe; this closes the same gap for swipe extensions.) + if (!extendComposingWord) { + mWordComposer.setCapitalizedModeAtStartComposingTime( + getActualCapsMode(settingsValues, keyboardSwitcher.getKeyboardShiftMode())); + } } /* @@ -3743,6 +3756,44 @@ private static boolean isPlainLetterWord(final String s) { return true; } + /** + * Apply a word's casing INTENT to a casing-neutral lemma. Used by the live-converge + * (merged-trail) re-recognition path, which replaces the whole composing word on every + * extending tap. The recognizer's output is treated as letters only — its own casing (e.g. an + * all-caps dictionary acronym that the engine happened to rank first) is discarded by + * lowercasing — and the case is then re-derived from the persistent per-word intent + * ({@link WordComposer#getCapitalizedMode()}, seeded at word-start from auto-cap + shift and + * alive until commit). This keeps a sentence-start capital across every re-converge (issue #5) + * and, because the lemma is lowercased first, prevents an unsolicited all-caps result from + * latching the whole word in caps (issue #4). + * + *

Pure function of its inputs (no engine / native lib), so the casing behaviour is unit + * testable directly. {@code public static} for that reason. + * + * @param lemma the recognizer's word output (casing not trusted) + * @param capitalizedMode one of the {@link WordComposer} {@code CAPS_MODE_*} constants + * @param locale locale for case mapping + * @return the lemma cased to match the intent + */ + public static String applyComposingCase(final String lemma, final int capitalizedMode, + final Locale locale) { + if (lemma == null || lemma.isEmpty()) return lemma; + final String lower = lemma.toLowerCase(locale); + switch (capitalizedMode) { + case WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED: + case WordComposer.CAPS_MODE_MANUAL_SHIFT_LOCKED: + // Deliberate caps-lock (or an all-caps input field) — uppercase the whole word. + return lower.toUpperCase(locale); + case WordComposer.CAPS_MODE_AUTO_SHIFTED: + case WordComposer.CAPS_MODE_MANUAL_SHIFTED: + // Sentence-start / shift — first letter only. + return StringUtils.capitalizeFirstCodePoint(lower, locale); + default: + // CAPS_MODE_OFF — no intent, leave the neutral lemma lowercase. + return lower; + } + } + private boolean textBeforeCursorMayBeUrlOrSimilar(final SettingsValues settingsValues, final Boolean forAutoSpace) { // URL / mail field and no space -> may be URL if (InputTypeUtils.isUriOrEmailType(settingsValues.mInputAttributes.mInputType) && @@ -3892,9 +3943,12 @@ public void onUpdateTailBatchInputCompleted(final SettingsValues settingsValues, // those continuation gestures should append in the casing the user already chose for // the start of the word. if (!extendExistingCompose && !batchInputText.isEmpty()) { - // Use the shift mode captured at gesture-start, not the live mode — the - // keyboard auto-clears the shifted indicator during the gesture, so a live - // read here always returns UNSHIFTED. + // Fresh-word capitalization: use the shift mode captured at gesture-start, not the live + // mode — the keyboard auto-clears the shifted indicator during the gesture, so a live + // read here always returns UNSHIFTED. This is the long-standing path for a plain single + // gesture; it ADDS a capital to the recognizer's lowercase output and deliberately does + // NOT neutralize an intrinsic all-caps result, so a standalone acronym swipe ("CSA") + // stays as-is. (The EXTEND path below handles re-cased re-recognition separately.) final int shiftMode = mShiftModeAtGestureStart; if (shiftMode == WordComposer.CAPS_MODE_MANUAL_SHIFTED || shiftMode == WordComposer.CAPS_MODE_AUTO_SHIFTED) { @@ -3904,8 +3958,24 @@ public void onUpdateTailBatchInputCompleted(final SettingsValues settingsValues, batchInputText = batchInputText.toUpperCase(settingsValues.mLocale); } } - // Clear so a stale value from a previous gesture can't leak into a non-gesture - // commit later. + // Live-converge (#1.7) casing — gated to the merged-trail re-recognition path, so a plain + // single gesture (incl. a standalone acronym swipe, handled by the block above) is + // untouched. A merged-trail commit REPLACES the whole word with the recognizer's fresh + // output on every extending tap, which otherwise: + // - dropped the first-letter capital the word started with (issue #5); and + // - could latch an unsolicited all-caps acronym ("CSA"), which then stuck the whole word + // in caps via WordComposer.isAllUpperCase forcing every later suggestion upper (#4). + // The fix treats the recognizer output as a casing-NEUTRAL lemma and re-applies the word's + // persistent intent (WordComposer.mCapitalizedMode). Lowercasing the lemma first is what + // dissolves #4 at the source: the composing word is never all-caps, so isAllUpperCase + // never arms and Suggest stops force-uppercasing — no shift-lock special-case needed. + if (usedMergedTrail) { + batchInputText = applyComposingCase(batchInputText, mWordComposer.getCapitalizedMode(), + settingsValues.mLocale); + } + // Clear the fresh-word snapshot so a stale value from this gesture can't leak into a + // later non-gesture commit. (The persistent extend intent lives in mCapitalizedMode and + // is cleared by WordComposer.commitWord, not here.) mShiftModeAtGestureStart = WordComposer.CAPS_MODE_OFF; final String composedText = prevTypedWord + batchInputText; if (settingsValues.mGestureDebugDrawPoints) { diff --git a/app/src/test/java/helium314/keyboard/latin/InputLogicTest.kt b/app/src/test/java/helium314/keyboard/latin/InputLogicTest.kt index c55350a58..66ade1c57 100644 --- a/app/src/test/java/helium314/keyboard/latin/InputLogicTest.kt +++ b/app/src/test/java/helium314/keyboard/latin/InputLogicTest.kt @@ -692,6 +692,91 @@ class InputLogicTest { assertFalse(composer.isExtendBatchInputBaseSet) } + // --- Live-converge casing (#4 stuck all-caps, #5 dropped auto-cap). The merged-trail + // re-recognition path replaces the whole composing word on every extending tap; its casing is + // derived from the persistent per-word intent (WordComposer.mCapitalizedMode) applied to a + // casing-NEUTRAL lemma, via InputLogic.applyComposingCase. That transform is a pure function + // (no native engine), so the casing behaviour is testable here directly — the on-device + // recognition that produces the lemma is not (no gesture lib / tap coords in the JVM harness). + private val enLocale = "en".constructLocale() + + // #5: a word that started capitalized (sentence-start / shift) keeps its leading capital when + // the recognizer re-resolves it lowercase on an extending tap. "Hello" must not become "hellow". + @Test fun applyComposingCaseKeepsLeadingCapitalAcrossReconverge() { + assertEquals("Hellow", + InputLogic.applyComposingCase("hellow", WordComposer.CAPS_MODE_AUTO_SHIFTED, enLocale)) + assertEquals("Hellow", + InputLogic.applyComposingCase("hellow", WordComposer.CAPS_MODE_MANUAL_SHIFTED, enLocale)) + } + + // #4: an unsolicited all-caps recognizer result ("CSA") is neutralized to the word's intent + // instead of latching. Sentence-start intent -> "Can"; no intent (mid-sentence) -> "can". + // This is the exact case the old prior-word heuristic got wrong (it produced "can" at a + // sentence start because it could only see the previous all-caps fragment). + @Test fun applyComposingCaseNeutralizesUnsolicitedAllCaps() { + assertEquals("Can", + InputLogic.applyComposingCase("CAN", WordComposer.CAPS_MODE_AUTO_SHIFTED, enLocale)) + assertEquals("can", + InputLogic.applyComposingCase("CAN", WordComposer.CAPS_MODE_OFF, enLocale)) + } + + // Deliberate caps-lock (or an all-caps input field) still produces all-caps on the + // merged-trail path — the one case where the whole word legitimately stays upper. + @Test fun applyComposingCaseUppercasesUnderShiftLock() { + assertEquals("CAME", + InputLogic.applyComposingCase("came", WordComposer.CAPS_MODE_MANUAL_SHIFT_LOCKED, enLocale)) + assertEquals("CAME", + InputLogic.applyComposingCase("CAME", WordComposer.CAPS_MODE_AUTO_SHIFT_LOCKED, enLocale)) + } + + // No casing intent: the neutral lemma is left lowercase regardless of the recognizer's own + // casing, so a mid-sentence re-converge never injects stray capitals. + @Test fun applyComposingCaseLeavesLowercaseWhenNoIntent() { + assertEquals("game", + InputLogic.applyComposingCase("Game", WordComposer.CAPS_MODE_OFF, enLocale)) + assertEquals("game", + InputLogic.applyComposingCase("game", WordComposer.CAPS_MODE_OFF, enLocale)) + } + + // Defensive: empty / null lemma passes through untouched (the caller also guards, but the + // helper must be safe on its own). + @Test fun applyComposingCaseHandlesEmptyAndNull() { + assertEquals("", InputLogic.applyComposingCase("", WordComposer.CAPS_MODE_AUTO_SHIFTED, enLocale)) + assertEquals(null, InputLogic.applyComposingCase(null, WordComposer.CAPS_MODE_AUTO_SHIFTED, enLocale)) + } + + // An EXTENDING gesture (swipe+swipe / manual-spacing multi-part) must NOT re-capture the + // word's casing intent at onStartBatchInput. The keyboard auto-clears its shift indicator + // after the first gesture, so re-capturing would overwrite the word-start intent with + // UNSHIFTED and downcase a capitalized word ("Was"+swipe -> "wait" instead of "Wait", the + // on-device regression this guards). Intent must survive to the merged-trail casing step. + // (Live-converge tap extensions bypass onStartBatchInput, so they were never affected.) + @Test fun extendingGestureStartPreservesCasingIntent() { + reset() + latinIME.prefs().edit { putBoolean(Settings.PREF_GESTURE_MANUAL_SPACING, true) } + chainInput("wa") // open a composing word, cursor at end + // Simulate the word having started while shifted (as "Was" did on-device). + composer.setCapitalizedModeAtStartComposingTime(WordComposer.CAPS_MODE_MANUAL_SHIFTED) + // A second gesture starts to EXTEND it (manual spacing -> extendComposingWord = true). + inputLogic.onStartBatchInput(settingsValues, KeyboardSwitcher.getInstance(), latinIME.mHandler) + handleMessages() + // Pre-fix this was clobbered to CAPS_MODE_OFF by the unconditional re-capture. + assertEquals(WordComposer.CAPS_MODE_MANUAL_SHIFTED, composer.capitalizedMode) + } + + // Sanity: a FRESH gesture (no composing word) still captures the current intent — the guard + // must not freeze a stale mode from a previous word. + @Test fun freshGestureStartStillCapturesCasingIntent() { + reset() + latinIME.prefs().edit { putBoolean(Settings.PREF_GESTURE_MANUAL_SPACING, true) } + composer.setCapitalizedModeAtStartComposingTime(WordComposer.CAPS_MODE_MANUAL_SHIFT_LOCKED) + // No composing word -> extendComposingWord = false -> intent is re-captured from the + // keyboard (not shift-locked on a fresh field), not frozen at the stale locked value. + inputLogic.onStartBatchInput(settingsValues, KeyboardSwitcher.getInstance(), latinIME.mHandler) + handleMessages() + assertFalse(composer.capitalizedMode == WordComposer.CAPS_MODE_MANUAL_SHIFT_LOCKED) + } + // Static-seed reachability guard. PointerTracker's tap-seed path (sLastLetterTap*) is gated // on (!isMultipartComposeActive() && mCombiningGraceMs > 0). But grace > 0 forces multi-part // composition active, so that conjunction is unsatisfiable and the seed is currently