diff --git a/_bibliography/papers.bib b/_bibliography/papers.bib index 97d973e..34d05d8 100755 --- a/_bibliography/papers.bib +++ b/_bibliography/papers.bib @@ -56,7 +56,6 @@ @article{2024timelyllm TimelyLLM introduces novel mechanisms of segmented generation and scheduling that optimally leverage redundancy between robot plan generation and execution phases. We report an implementation of TimelyLLM on a widely-used LLM serving framework and evaluate it on a range of robotic applications. Our evaluation shows that TimelyLLM improves the time utility up to 1.97x, and reduces the overall waiting time by 84%.}, year={2026}, - month={June}, featured={Featured Paper}, pdf={timelyllm.pdf}, journal={The 24th ACM International Conference on Mobile Systems, Applications, and Services (ACM MobiSys 2026)}, @@ -70,7 +69,6 @@ @article{2026vexact author={Zhong(co-primary), Tianle and Ling(co-primary), Neiwen and Pi, Yifan and Wei, Zijun and Yu, Tianshu and Fox, Geoffrey and Wu, Peng and Yu, Xiao}, abstract={Modern LLM RL systems separate rollout generation from policy optimization. These two stages are expected to produce token probabilities that match exactly. However, implementation differences can make them assign different values to the same sequence under the same model weights, inducing Training-Inference Mismatch (TIM). TIM is difficult to inspect because it is entangled with off-policy drift and common stabilization mechanisms. In this work, we isolate TIM in a zero-mismatch diagnostic setting (VeXact), and show that small token-level numerical disagreements can independently cause training collapse. We further show that TIM changes the effective optimization problem, and identify a set of remedies that could mitigate TIM. Our results suggest that TIM is not benign numerical noise, but a systems-level perturbation that should be treated as a first-order factor in analyzing LLM RL stability.}, year={2026}, - month={May}, journal={arXiv preprint arXiv:2605.14220}, html={https://arxiv.org/abs/2605.14220}, }