Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 40 additions & 36 deletions _bibliography/papers.bib
Original file line number Diff line number Diff line change
Expand Up @@ -45,28 +45,7 @@ @article{PhysRev.47.777
pdf={example_pdf.pdf}
}

@article{ling2022blastnet,
abbr={SenSys'22},
selected={true},
title={BlastNet: Exploiting Duo-Blocks for Cross-Processor Real-Time DNN Inference},
author={Ling, Neiwen and Huang, Xuan and Zhao, Zhihe and Guan, Nan and Yan, Zhenyu and Xing, Guoliang},
journal={The 20th ACM Conference on Embedded Networked Sensor Systems (ACM SenSys 2022)},
year={2022},
award={Best Paper Finalist},
abstract = {In recent years, Deep Neural Network (DNN) has been increasingly adopted by a wide range of time-critical applications running on
edge platforms with heterogeneous multiprocessors. To meet the stringent timing requirements of these applications, heterogeneous
CPU and GPU resources must be efficiently utilized for the inference of multiple DNN models. Such a cross-processor real-time
DNN inference paradigm poses major challenges due to the inherent performance imbalance among different processors and the lack
of real-time support for cross-processor inference from existing deep learning frameworks. In this work, we propose a new system
named BlastNet that exploits duo-block - a new model inference abstraction to support highly efficient cross-processor real-time DNN
inference. Each duo-block has a dual model structure, enabling efficient fine-grained inference alternatively across different processors. BlastNet employs a novel block-level Neural Architecture
Search (NAS) technique to generate duo-blocks, which accounts for computing characteristics and communication overhead. The duoblocks are optimized at design time and then dynamically scheduled
to achieve high resource utilization of heterogeneous CPU and GPU at runtime. BlastNet is implemented on an indoor autonomous driving platform and three popular edge platforms. Extensive results
show that BlastNet achieves 35.07 % less deadline missing rate with a mere 1.63% of model accuracy loss.},
pdf = {blastnet_sensys2022.pdf},
html={https://dl.acm.org/doi/abs/10.1145/3560905.3568520},
slides={Blastnet-slides.pdf}
}


@article{2024timelyllm,
abbr={MobiSys'26},
Expand Down Expand Up @@ -96,6 +75,21 @@ @article{2026vexact
html={https://arxiv.org/abs/2605.14220},
}

@article{2024typefly,
abbr={arxiv},
title={TypeFly: Flying Drones with Large Language Model},
author={Chen, Guojun and Yu, Xiaojing and Ling, Neiwen and Zhong, Lin},
abstract={Recent advancements in robot control using large language models (LLMs) have demonstrated significant potential, primarily due to LLMs’ capabilities to understand natural language commands and generate executable plans in various
languages. However, in real-time and interactive applications involving mobile robots, particularly drones, the sequential token generation process inherent to LLMs introduces substantial latency, i.e. response time, in control plan generation.
In this paper, we present a system called TypeFly that tackles this problem using a combination of a novel programming language called MiniSpec and its runtime to reduce the plan generation time and drone response time. That is, instead of asking an LLM to write a program (robotic plan) in the popular but verbose Python, TypeFly gets it to do it in MiniSpec specially designed for token efficiency and stream interpretation.
Using a set of challenging drone tasks, we show that design choices made by TypeFly can reduce up to 62% response time and provide a more consistent user experience, enabling responsive and intelligent LLM-based drone control with efficient completion.},
year={2024},
pdf={typefly.pdf},
journal={arXiv preprint arXiv:2312.14950},
html={https://arxiv.org/abs/2312.14950},
}


@article{ling2021rt,
abbr={SenSys'21},
title={RT-mDL: Supporting Real-Time Mixed Deep Learning Tasks on Edge Platforms},
Expand Down Expand Up @@ -133,6 +127,29 @@ @article{2024soar
html={https://dl.acm.org/doi/10.1145/3636534.3649352},
}

@article{ling2022blastnet,
abbr={SenSys'22},
selected={true},
title={BlastNet: Exploiting Duo-Blocks for Cross-Processor Real-Time DNN Inference},
author={Ling, Neiwen and Huang, Xuan and Zhao, Zhihe and Guan, Nan and Yan, Zhenyu and Xing, Guoliang},
journal={The 20th ACM Conference on Embedded Networked Sensor Systems (ACM SenSys 2022)},
year={2022},
award={Best Paper Finalist},
abstract = {In recent years, Deep Neural Network (DNN) has been increasingly adopted by a wide range of time-critical applications running on
edge platforms with heterogeneous multiprocessors. To meet the stringent timing requirements of these applications, heterogeneous
CPU and GPU resources must be efficiently utilized for the inference of multiple DNN models. Such a cross-processor real-time
DNN inference paradigm poses major challenges due to the inherent performance imbalance among different processors and the lack
of real-time support for cross-processor inference from existing deep learning frameworks. In this work, we propose a new system
named BlastNet that exploits duo-block - a new model inference abstraction to support highly efficient cross-processor real-time DNN
inference. Each duo-block has a dual model structure, enabling efficient fine-grained inference alternatively across different processors. BlastNet employs a novel block-level Neural Architecture
Search (NAS) technique to generate duo-blocks, which accounts for computing characteristics and communication overhead. The duoblocks are optimized at design time and then dynamically scheduled
to achieve high resource utilization of heterogeneous CPU and GPU at runtime. BlastNet is implemented on an indoor autonomous driving platform and three popular edge platforms. Extensive results
show that BlastNet achieves 35.07 % less deadline missing rate with a mere 1.63% of model accuracy loss.},
pdf = {blastnet_sensys2022.pdf},
html={https://dl.acm.org/doi/abs/10.1145/3560905.3568520},
slides={Blastnet-slides.pdf}
}

@article{ling2025tmc,
abbr={TMC'25},
title={Time-sensitive Multi-DNN Inference on CPU-GPU Edge Platforms},
Expand Down Expand Up @@ -165,19 +182,6 @@ @article{liu2024sensys
year={2024},
}

@article{2024typefly,
abbr={arxiv},
title={TypeFly: Flying Drones with Large Language Model},
author={Chen, Guojun and Yu, Xiaojing and Ling, Neiwen and Zhong, Lin},
abstract={Recent advancements in robot control using large language models (LLMs) have demonstrated significant potential, primarily due to LLMs’ capabilities to understand natural language commands and generate executable plans in various
languages. However, in real-time and interactive applications involving mobile robots, particularly drones, the sequential token generation process inherent to LLMs introduces substantial latency, i.e. response time, in control plan generation.
In this paper, we present a system called TypeFly that tackles this problem using a combination of a novel programming language called MiniSpec and its runtime to reduce the plan generation time and drone response time. That is, instead of asking an LLM to write a program (robotic plan) in the popular but verbose Python, TypeFly gets it to do it in MiniSpec specially designed for token efficiency and stream interpretation.
Using a set of challenging drone tasks, we show that design choices made by TypeFly can reduce up to 62% response time and provide a more consistent user experience, enabling responsive and intelligent LLM-based drone control with efficient completion.},
year={2024},
pdf={typefly.pdf},
journal={arXiv preprint arXiv:2312.14950},
html={https://arxiv.org/abs/2312.14950},
}

@article{jiang2023coedge,
abbr={IPSN'23},
Expand All @@ -198,7 +202,7 @@ @article{2025chatfly
selected={true},
title={TypeFly: Low-Latency Drone Planning with Large Language Models},
author={Chen, Guojun and Yu, Xiaojing and Ling, Neiwen and Zhong, Lin},
abstract={Commanding a drone with a natural language is not only user-friendly but also opens the door for emerging language agents to control the drone. Emerging large language models (LLMs) provide a previously impossible opportunity to automatically translate a task description in a natural language to a program that can be executed by the drone. However, powerful LLMs and their vision counterparts are limited in three important ways. First, they are only available as cloud-based services. Sending images to the cloud raises privacy concerns. Second, they are expensive, costing proportionally to the request size. Finally, without expensive fine-tuning, existing LLMs are quite limited in their capability of writing a program for specialized systems like drones. In this paper, we present a system called TypeFly that tackles the above three problems using a combination of edge-based vision intelligence, novel programming language design, and prompt engineering. Instead of the familiar Python, TypeFly gets a cloud-based LLM service to write a program in a small, custom language called MiniSpec, based on task and scene descriptions in English. Such MiniSpec programs are not only succinct (and therefore efficient) but also able to consult the LLM during their execution using a special skill called query. Using a set of increasingly challenging drone tasks, we show that design choices made by TypeFly can reduce both the cost of LLM service and the task execution time by more than 2x. More importantly, query and prompt engineering techniques contributed by TypeFly significantly improve the chance of success of complex tasks.},
abstract={Recent advancements in robot planning using large language models (LLMs) have demonstrated significant potential, primarily due to LLMs' capabilities to understand natural language commands and generate executable plans in various languages. However, in time-sensitive and interactive applications involving mobile robots, particularly drones, the sequential token generation process inherent to LLMs introduces substantial latency, i.e. response time, during the control plan generation. In this paper, we present a system called TypeFly that tackles this latency problem using a combination of a novel programming language called MiniSpec and its runtime to reduce both the response time and generation time for the robot plan. That is, instead of asking an LLM to write a program (robotic plan) in the popular but verbose Python, TypeFly gets it to do it in MiniSpec specially designed for token efficiency and stream interpreting. Using a set of challenging drone tasks, we show that design choices made by TypeFly can reduce the average response time to 74% compared to existing works and provide a more consistent user experience, enabling responsive and intelligent LLM-based drone control.},
year={2025},
journal={IEEE Transactions on Mobile Computing (IEEE TMC 2025)},
html={https://ieeexplore.ieee.org/document/10970379},
Expand Down
Loading