@inproceedings{zhang2025umu, title={Unified Multimodal Understanding via Byte-Pair Visual Encoding}, author={Wanpeng Zhang and Yicheng Feng and Hao Luo and Yijiang Li and Zihao Yue and Sipeng Zheng and Zongqing Lu}, booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, year={2025} }