Publications | Songyang Zhang

2025

ACL

Capability Salience Vector: Fine-grained Alignment of Loss and Capabilities for Downstream Task Scaling Law

Qiming Ge, Shuhao Xing, Songyang Gao, Yunhua Zhou, Yicheng Zou, and 6 more authors

In Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL), 2025

@inproceedings{ge2025capability,
  title = {Capability Salience Vector: Fine-grained Alignment of Loss and Capabilities for Downstream Task Scaling Law},
  author = {Ge, Qiming and Xing, Shuhao and Gao, Songyang and Zhou, Yunhua and Zou, Yicheng and Zhang, Songyang and Chen, Zhi and Yan, Hang and Zhang, Qi and Guo, Qipeng and Chen, Kai},
  booktitle = {Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL),},
  year = {2025}
}

ACL

OpenHuEval: Evaluating Large Language Model on Hungarian Specifics

Haote Yang, Xingjian Wei, Jiang Wu, Noémi Ligeti-Nagy, Jiaxing Sun, and 16 more authors

In Findings of the Association for Computational Linguistics (ACL), 2025

Bib

@inproceedings{yang2025openhueval,
  title = {OpenHuEval: Evaluating Large Language Model on Hungarian Specifics},
  author = {Yang, Haote and Wei, Xingjian and Wu, Jiang and Ligeti-Nagy, Noémi and Sun, Jiaxing and Wang, Yinfan and Yang, Zijian Győző and Gao, Junyuan and Wang, Jingchao and Jiang, Bowen and Wang, Shasha and Yu, Nanjun and Zhang, Zihao and Hong, Shixin and Liu, Hongwei and Li, Wei and Zhang, Songyang and Lin, Dahua and Wu, Lijun and Prószéky, Gábor and He, Conghui},
  booktitle = {Findings of the Association for Computational Linguistics (ACL),},
  year = {2025}
}

ACL

Are Your LLMs Capable of Stable Reasoning?

Junnan Liu, Hongwei Liu, Linchen Xiao, Ziyi Wang, Kuikun Liu, and 4 more authors

In Findings of the Association for Computational Linguistics (ACL), 2025

Bib

@inproceedings{liu2025stable,
  title = {Are Your LLMs Capable of Stable Reasoning?},
  author = {Liu, Junnan and Liu, Hongwei and Xiao, Linchen and Wang, Ziyi and Liu, Kuikun and Gao, Songyang and Zhang, Wenwei and Zhang, Songyang and Chen, Kai},
  booktitle = {Findings of the Association for Computational Linguistics (ACL),},
  year = {2025}
}

ACL

Condor: Enhance LLM Alignment with Knowledge-Driven Data Synthesis and Refinement

Maosong Cao, Taolin Zhang, Mo Li, Chuyu Zhang, Yunxin Liu, and 3 more authors

In Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL), 2025

Bib

@inproceedings{cao2025condor,
  title = {Condor: Enhance LLM Alignment with Knowledge-Driven Data Synthesis and Refinement},
  author = {Cao, Maosong and Zhang, Taolin and Li, Mo and Zhang, Chuyu and Liu, Yunxin and Duan, Haodong and Zhang, Songyang and Chen, Kai},
  booktitle = {Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL),},
  year = {2025}
}

AAAI

UrBench: A Comprehensive Benchmark for Evaluating Large Multimodal Models in Multi-View Urban Scenarios

Baichuan Zhou, Haote Yang, Dairong Chen, Junyan Ye, Tianyi Bai, and 5 more authors

In Proceeding of Association for the Advancement of Artificial Intelligence (AAAI), 2025

Bib

@inproceedings{zhou2025urbench,
  title = {UrBench: A Comprehensive Benchmark for Evaluating Large Multimodal Models in Multi-View Urban Scenarios},
  author = {Zhou, Baichuan and Yang, Haote and Chen, Dairong and Ye, Junyan and Bai, Tianyi and Yu, Jinhua and Zhang, Songyang and Lin, Dahua and He, Conghui and Li, Weijia},
  booktitle = {Proceeding of Association for the Advancement of Artificial Intelligence (AAAI),},
  year = {2025}
}

2024

NeurIPS

GTA: A Benchmark for General Tool Agents

Jize Wang, Zerun Ma, Yining Li, Songyang Zhang, Cailian Chen, and 2 more authors

In Proceeding of Advances in Neural Information Processing Systems (NeurIPS), 2024

Bib

@inproceedings{wang2024gta,
  title = {GTA: A Benchmark for General Tool Agents},
  author = {Wang, Jize and Ma, Zerun and Li, Yining and Zhang, Songyang and Chen, Cailian and Chen, Kai and Le, Xinyi},
  booktitle = {Proceeding of Advances in Neural Information Processing Systems (NeurIPS),},
  year = {2024}
}

NeurIPS

InternLM-XComposer2-4KHD: A Pioneering Large Vision-Language Model Handling Resolutions from 336 Pixels to 4K HD

Xiaoyi Dong, Pan Zhang, Yuhang Zang, Yuhang Cao, Bin Wang, and 19 more authors

In Proceeding of Advances in Neural Information Processing Systems (NeurIPS), 2024

Bib

@inproceedings{dong2024internlmxcomposer2_4khd,
  title = {InternLM-XComposer2-4KHD: A Pioneering Large Vision-Language Model Handling Resolutions from 336 Pixels to 4K HD},
  author = {Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Wang, Bin and Ouyang, Linke and Zhang, Songyang and Duan, Haodong and Zhang, Wenwei and Li, Yining and Yan, Hang and Gao, Yang and Chen, Zhe and Zhang, Xinyue and Li, Wei and Li, Jingwen and Wang, Wenhai and Chen, Kai and He, Conghui and Zhang, Xingcheng and Dai, Jifeng and Qiao, Yu and Lin, Dahua and Wang, Jiaqi},
  booktitle = {Proceeding of Advances in Neural Information Processing Systems (NeurIPS),},
  year = {2024}
}

NeurIPS

Prism: A Framework for Decoupling and Assessing the Capabilities of VLMs

Yuxuan Qiao, Haodong Duan, Xinyu Fang, Junming Yang, Lin Chen, and 4 more authors

In Proceeding of Advances in Neural Information Processing Systems (NeurIPS), 2024

Bib

@inproceedings{qiao2025prism,
  title = {Prism: A Framework for Decoupling and Assessing the Capabilities of VLMs},
  author = {Qiao, Yuxuan and Duan, Haodong and Fang, Xinyu and Yang, Junming and Chen, Lin and Zhang, Songyang and Wang, Jiaqi and Lin, Dahua and Chen, Kai},
  booktitle = {Proceeding of Advances in Neural Information Processing Systems (NeurIPS),},
  year = {2024}
}

EMNLP

ProSA: Assessing and Understanding the Prompt Sensitivity of LLMs

Zhuo Jingming, Zhang Songyang, Fang Xinyu, Duan Haodong, Lin Dahua, and 1 more author

In Findings of the Conference on Empirical Methods in Natural Language Processing (EMNLP), 2024

Bib

@inproceedings{zhuo2024prosa,
  title = {ProSA: Assessing and Understanding the Prompt Sensitivity of LLMs},
  author = {Jingming, Zhuo and Songyang, Zhang and Xinyu, Fang and Haodong, Duan and Dahua, Lin and Kai, Chen},
  booktitle = {Findings of the Conference on Empirical Methods in Natural Language Processing (EMNLP),},
  year = {2024},
}

EMNLP

LawBench: Benchmarking Legal Knowledge of Large Language Models

Zhiwei Fei, Xiaoyu Shen, Dawei Zhu, Fengzhe Zhou, Zhuo Han, and 4 more authors

In Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP), 2024

Bib Code

@inproceedings{fei2023lawbench,
  title = {LawBench: Benchmarking Legal Knowledge of Large Language Models},
  author = {Fei, Zhiwei and Shen, Xiaoyu and Zhu, Dawei and Zhou, Fengzhe and Han, Zhuo and Zhang, Songyang and Chen, Kai and Shen, Zongwen and Ge, Jidong},
  booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP),},
  year = {2024},
}

ACL

Benchmarking Chinese Commonsense Reasoning of LLMs: From Chinese-Specifics to Reasoning-Memorization Correlations

Jiaxing Sun, Weiquan Huang, Jiang Wu, Chenya Gu, Wei Li, and 3 more authors

In Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL), 2024

Bib

@inproceedings{sun2024benchmarking,
  title = {Benchmarking Chinese Commonsense Reasoning of LLMs: From Chinese-Specifics to Reasoning-Memorization Correlations},
  author = {Sun, Jiaxing and Huang, Weiquan and Wu, Jiang and Gu, Chenya and Li, Wei and Zhang, Songyang and Yan, Hang and He, Conghui},
  booktitle = {Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL),},
  year = {2024}
}

ACL

MathBench: Evaluating the Theory and Application Proficiency of LLMs with a Hierarchical Mathematics Benchmark

Hongwei Liu, Zilong Zheng, Yuxuan Qiao, Haodong Duan, Zhiwei Fei, and 5 more authors

In Findings of the Association for Computational Linguistics (ACL), 2024

Bib

@inproceedings{liu2024mathbench,
  title = {MathBench: Evaluating the Theory and Application Proficiency of LLMs with a Hierarchical Mathematics Benchmark},
  author = {Liu, Hongwei and Zheng, Zilong and Qiao, Yuxuan and Duan, Haodong and Fei, Zhiwei and Zhou, Fengzhe and Zhang, Wenwei and Zhang, Songyang and Lin, Dahua and Chen, Kai},
  booktitle = {Findings of the Association for Computational Linguistics (ACL),},
  year = {2024}
}

ACL

LLaST: Improved End-to-end Speech Translation System Leveraged by Large Language Models

Xi Chen, Songyang Zhang, Qibing Bai, Kai Chen, and Satoshi Nakamura

In Findings of the Association for Computational Linguistics (ACL), 2024

Bib

@inproceedings{chen2024llast,
  title = {LLaST: Improved End-to-end Speech Translation System Leveraged by Large Language Models},
  author = {Chen, Xi and Zhang, Songyang and Bai, Qibing and Chen, Kai and Nakamura, Satoshi},
  booktitle = {Findings of the Association for Computational Linguistics (ACL),},
  year = {2024}
}

ACL

T-Eval: Evaluating the Tool Utilization Capability of Large Language Models Step by Step

Zehui Chen, Weihua Du, Wenwei Zhang, Kuikun Liu, Jiangning Liu, and 6 more authors

In Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL), 2024

Bib Code

@inproceedings{chen2024t,
  title = {T-Eval: Evaluating the Tool Utilization Capability of Large Language Models Step by Step},
  author = {Chen, Zehui and Du, Weihua and Zhang, Wenwei and Liu, Kuikun and Liu, Jiangning and Zheng, Miao and Zhuo, Jingming and Zhang, Songyang and Lin, Dahua and Chen, Kai and Zhao, Feng},
  booktitle = {Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL),},
  year = {2024}
}

ArXiv

InternLM2 Technical Report

Zheng Cai, Maosong Cao, Haojiong Chen, Kai Chen, Keyu Chen, and 95 more authors

2024

Bib Code

@misc{cai2024internlm2,
  title = {InternLM2 Technical Report},
  author = {Cai, Zheng and Cao, Maosong and Chen, Haojiong and Chen, Kai and Chen, Keyu and Chen, Xin and Chen, Xun and Chen, Zehui and Chen, Zhi and Chu, Pei and Dong, Xiaoyi and Duan, Haodong and Fan, Qi and Fei, Zhaoye and Gao, Yang and Ge, Jiaye and Gu, Chenya and Gu, Yuzhe and Gui, Tao and Guo, Aijia and Guo, Qipeng and He, Conghui and Hu, Yingfan and Huang, Ting and Jiang, Tao and Jiao, Penglong and Jin, Zhenjiang and Lei, Zhikai and Li, Jiaxing and Li, Jingwen and Li, Linyang and Li, Shuaibin and Li, Wei and Li, Yining and Liu, Hongwei and Liu, Jiangning and Hong, Jiawei and Liu, Kaiwen and Liu, Kuikun and Liu, Xiaoran and Lv, Chengqi and Lv, Haijun and Lv, Kai and Ma, Li and Ma, Runyuan and Ma, Zerun and Ning, Wenchang and Ouyang, Linke and Qiu, Jiantao and Qu, Yuan and Shang, Fukai and Shao, Yunfan and Song, Demin and Song, Zifan and Sui, Zhihao and Sun, Peng and Sun, Yu and Tang, Huanze and Wang, Bin and Wang, Guoteng and Wang, Jiaqi and Wang, Jiayu and Wang, Rui and Wang, Yudong and Wang, Ziyi and Wei, Xingjian and Weng, Qizhen and Wu, Fan and Xiong, Yingtong and Xu, Chao and Xu, Ruiliang and Yan, Hang and Yan, Yirong and Yang, Xiaogui and Ye, Haochen and Ying, Huaiyuan and Yu, Jia and Yu, Jing and Zang, Yuhang and Zhang, Chuyu and Zhang, Li and Zhang, Pan and Zhang, Peng and Zhang, Ruijie and Zhang, Shuo and Zhang, Songyang and Zhang, Wenjian and Zhang, Wenwei and Zhang, Xingcheng and Zhang, Xinyue and Zhao, Hui and Zhao, Qian and Zhao, Xiaomeng and Zhou, Fengzhe and Zhou, Zaida and Zhuo, Jingming and Zou, Yicheng and Qiu, Xipeng and Qiao, Yu and Lin, Dahua},
  year = {2024},
  booktitle = {arXiv Preprint,},
}

ArXiv

InternLM-XComposer2: Mastering Free-form Text-Image Composition and Comprehension in Vision-Language Large Model

Xiaoyi Dong, Pan Zhang, Yuhang Zang, Yuhang Cao, Bin Wang, and 18 more authors

2024

Bib Code

@article{dong2024internlmxcomposer2,
  title = {InternLM-XComposer2: Mastering Free-form Text-Image Composition and Comprehension in Vision-Language Large Model},
  author = {Dong, Xiaoyi and Zhang, Pan and Zang, Yuhang and Cao, Yuhang and Wang, Bin and Ouyang, Linke and Wei, Xilin and Zhang, Songyang and Duan, Haodong and Cao, Maosong and Zhang, Wenwei and Li, Yining and Yan, Hang and Gao, Yang and Zhang, Xinyue and Li, Wei and Li, Jingwen and Chen, Kai and He, Conghui and Zhang, Xingcheng and Qiao, Yu and Lin, Dahua and Wang, Jiaqi},
  year = {2024},
  booktitle = {arXiv Preprint,},
}

CVPR

From Pixels to Graphs: Open-Vocabulary Scene Graph Generation with Vision-Language Models

Rongjie Li, Songyang Zhang, Dahua Lin, Kai Chen, and Xuming He

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2024

NAACL

Fake Alignment: Are LLMs Really Aligned Well?

Yixu Wang, Yan Teng, Kexin Huang, Chengqi Lyu, Songyang Zhang, and 3 more authors

In Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL), 2024

Bib

@inproceedings{wang2023fake,
  title = {Fake Alignment: Are LLMs Really Aligned Well?},
  author = {Wang, Yixu and Teng, Yan and Huang, Kexin and Lyu, Chengqi and Zhang, Songyang and Zhang, Wenwei and Ma, Xingjun and Wang, Yingchun},
  year = {2024},
  booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL),},
}

T-PAMI

SGTR+: End-to-end Scene Graph Generation with Transformer

Rongjie Li, Songyang Zhang, and Xuming He

In IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2024

Bib

@inproceedings{li2023sgtrplus,
  title = {SGTR+: End-to-end Scene Graph Generation with Transformer},
  author = {Li, Rongjie and Zhang, Songyang and He, Xuming},
  booktitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI),},
  year = {2024},
}

NAACL

BotChat: Evaluating LLMs’ Capabilities of Having Multi-Turn Dialogues

Haodong Duan, Jueqi Wei, Chonghua Wang, Hongwei Liu, Yixiao Fang, and 3 more authors

In Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL), 2024

Bib Code

@inproceedings{duan2023botchat,
  title = {BotChat: Evaluating LLMs' Capabilities of Having Multi-Turn Dialogues},
  author = {Duan, Haodong and Wei, Jueqi and Wang, Chonghua and Liu, Hongwei and Fang, Yixiao and Zhang, Songyang and Lin, Dahua and Chen, Kai},
  year = {2024},
  booktitle = {Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL),},
}

ECCV

MMBench: Is Your Multi-modal Model an All-around Player?

Yuan Liu, Haodong Duan, Yuanhan Zhang, Bo Li, Songyang Zhang, and 7 more authors

In Proceeding of the European Conference on Computer Vision (ECCV), 2024

Bib HTML Code

@inproceedings{liu2023mmbench,
  title = {MMBench: Is Your Multi-modal Model an All-around Player?},
  author = {Liu, Yuan and Duan, Haodong and Zhang, Yuanhan and Li, Bo and Zhang, Songyang and Zhao, Wangbo and Yuan, Yike and Wang, Jiaqi and He, Conghui and Liu, Ziwei and Chen, Kai and Lin, Dahua},
  booktitle = {Proceeding of the European Conference on Computer Vision (ECCV),},
  year = {2024},
}

TMLR

PixMIM: Rethinking Pixel Reconstruction in Masked Image Modeling

Yuan Liu, Songyang Zhang, Jiacheng Chen, Kai Chen, and Dahua Lin

In Transactions on Machine Learning Research (TMLR), 2024

Bib HTML Code

@inproceedings{liu2023pixmim,
  title = {PixMIM: Rethinking Pixel Reconstruction in Masked Image Modeling},
  author = {Liu, Yuan and Zhang, Songyang and Chen, Jiacheng and Chen, Kai and Lin, Dahua},
  booktitle = {Transactions on Machine Learning Research (TMLR),},
  year = {2024},
}

2023

ArXiv

InternLM-XComposer: A Vision-Language Large Model for Advanced Text-image Comprehension and Composition

Pan Zhang, Xiaoyi Dong, Bin Wang, Yuhang Cao, Chao Xu, and 16 more authors

2023

Bib Code

@article{zhang2023internlmxcomposer,
  title = {InternLM-XComposer: A Vision-Language Large Model for Advanced Text-image Comprehension and Composition},
  author = {Zhang, Pan and Dong, Xiaoyi and Wang, Bin and Cao, Yuhang and Xu, Chao and Ouyang, Linke and Zhao, Zhiyuan and Duan, Haodong and Zhang, Songyang and Ding, Shuangrui and Zhang, Wenwei and Yan, Hang and Zhang, Xinyue and Li, Wei and Li, Jingwen and Chen, Kai and He, Conghui and Zhang, Xingcheng and Qiao, Yu and Lin, Dahua and Wang, Jiaqi},
  year = {2023},
  booktitle = {arXiv Preprint,},
}

IJCAI

TG-VQA: Ternary Game of Video Question Answering

Hao Li, Peng Jin, Zesen Cheng, Songyang Zhang, Kai Chen, and 3 more authors

In Proceeding of International Joint Conferences on Artificial Intelligence (IJCAI), 2023

Bib HTML

@inproceedings{li2023tgvqa,
  title = {TG-VQA: Ternary Game of Video Question Answering},
  author = {Li, Hao and Jin, Peng and Cheng, Zesen and Zhang, Songyang and Chen, Kai and Wang, Zhennan and Liu, Chang and Chen, Jie},
  booktitle = {Proceeding of International Joint Conferences on Artificial Intelligence (IJCAI),},
  year = {2023},
}

ICCV

Improving Pixel-based MIM by Reducing Wasted Modeling Capability

Yuan Liu, Songyang Zhang, Jiacheng Chen, Zhaohui Yu, Kai Chen, and 1 more author

In Proceedings of the IEEE/CVF International Conference on Computer Vision(ICCV), 2023

Bib HTML Code

@inproceedings{liu2023mff,
  title = {Improving Pixel-based MIM by Reducing Wasted Modeling Capability},
  author = {Liu, Yuan and Zhang, Songyang and Chen, Jiacheng and Yu, Zhaohui and Chen, Kai and Lin, Dahua},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision(ICCV),},
  year = {2023},
}

CVPR

RIFormer: Keep Your Vision Backbone Effective But Removing Token Mixer

Jiahao Wang, Songyang Zhang, Yong Liu, Taiqiang Wu, Yujiu Yang, and 4 more authors

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2023

Bib HTML Code

@inproceedings{wang2022riformer,
  title = {RIFormer: Keep Your Vision Backbone Effective But Removing Token Mixer},
  author = {Wang, Jiahao and Zhang, Songyang and Liu, Yong and Wu, Taiqiang and Yang, Yujiu and Liu, Xihui and Chen, Kai and Luo, Ping and Lin, Dahua},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2023},
}

2022

ECCV

Learning Semantic Correspondence with Sparse Annotations

Shuaiyi Huang, Luyu Yang, Bo He, Songyang Zhang, Xuming He, and 1 more author

In Proceeding of the European Conference on Computer Vision (ECCV), 2022

Bib HTML Code

@inproceedings{huang2022learning,
  title = {Learning Semantic Correspondence with Sparse Annotations},
  author = {Huang, Shuaiyi and Yang, Luyu and He, Bo and Zhang, Songyang and He, Xuming and Shrivastava, Abhinav},
  booktitle = {Proceeding of the European Conference on Computer Vision (ECCV),},
  year = {2022},
}

ECCV

Action Quality Assessment with Temporal Parsing Transformer

Yang Bai, Desen Zhou, Songyang Zhang, Jian Wang, Errui Ding, and 2 more authors

In Proceeding of the European Conference on Computer Vision (ECCV), 2022

Bib HTML

@inproceedings{bai2022action,
  title = {Action Quality Assessment with Temporal Parsing Transformer},
  author = {Bai, Yang and Zhou, Desen and Zhang, Songyang and Wang, Jian and Ding, Errui and Long, Yang and Wang, Jingdong},
  booktitle = {Proceeding of the European Conference on Computer Vision (ECCV),},
  year = {2022},
}

CVPR

SGTR: End-to-end Scene Graph Generation with Transformer

Rongjie Li, Songyang Zhang, and Xuming He

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2022

Bib HTML Code

@inproceedings{li2022sgtr,
  title = {SGTR: End-to-end Scene Graph Generation with Transformer},
  author = {Li, Rongjie and Zhang, Songyang and He, Xuming},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2022},
}

2021

NeurIPS

Dynamic Grained Encoder for Vision Transformers

Lin Song*, Songyang Zhang*, Songtao Liu, Zeming Li, Xuming He, and 3 more authors

In Proceeding of Advances in Neural Information Processing Systems (NeurIPS), 2021

Bib HTML Code

@inproceedings{lin2021dynamic,
  author = {Song*, Lin and Zhang*, Songyang and Liu, Songtao and Li, Zeming and He, Xuming and Sun, Hongbin and Sun, Jian and Zheng, Nanning},
  booktitle = {Proceeding of Advances in Neural Information Processing Systems (NeurIPS),},
  year = {2021},
}

ACM MM

An EM Framework for Online Incremental Learning of Semantic Segmentation

Shipeng Yan*, Jiale Zhou*, Jiangwei Xie, Songyang Zhang, and Xuming He

In Proceeding of The 29th ACM International Conference on Multimedia (ACM MM), 2021

Bib HTML

@inproceedings{yan2021anem,
  title = {An EM Framework for Online Incremental Learning of Semantic Segmentation},
  author = {Yan*, Shipeng and Zhou*, Jiale and Xie, Jiangwei and Zhang, Songyang and He, Xuming},
  booktitle = {Proceeding of The 29th ACM International Conference on Multimedia (ACM MM), },
  year = {2021},
}

IJCAI

Learning Implicit Temporal Alignment for Few-shot Video Classification

Songyang Zhang*, Jiale Zhou*, and Xuming He

In Proceeding of International Joint Conferences on Artificial Intelligence (IJCAI), 2021

Bib HTML

@inproceedings{zhang2021learning,
  title = {Learning Implicit Temporal Alignment for Few-shot Video Classification},
  author = {Zhang*, Songyang and Zhou*, Jiale and He, Xuming},
  booktitle = {Proceeding of International Joint Conferences on Artificial Intelligence (IJCAI),},
  year = {2021},
}

CVPR

Bipartite Graph Network with Adaptive Message Passing for Unbiased Scene Graph Generation

Rongjie Li, Songyang Zhang, Bo Wan, and Xuming He

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021

Bib HTML Code

@inproceedings{li2021bipartite,
  title = {Bipartite Graph Network with Adaptive Message Passing for Unbiased Scene Graph Generation},
  author = {Li, Rongjie and Zhang, Songyang and Wan, Bo and He, Xuming},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2021},
}

CVPR

Distribution Alignment: A Unified Framework for Long-tail Visual Recognition

Songyang Zhang, Zeming Li, Shipeng Yan, Xuming He, and Jian Sun

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2021

Bib HTML Code

@inproceedings{zhang2021distribution,
  title = {Distribution Alignment: A Unified Framework for Long-tail Visual Recognition},
  author = {Zhang, Songyang and Li, Zeming and Yan, Shipeng and and He, Xuming and Sun, Jian},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2021},
}

2020

InterSpeech

Transformer with Bidirectional Decoder for Speech Recognition

Xi Chen, Songyang Zhang, Dandan Song, Peng Ouyang, and Shouyi Yin

In The Conference of the International Speech Communication Association (InterSpeech), 2020

Bib HTML

@inproceedings{chen2020transformer,
  author = {Chen, Xi and Zhang, Songyang and Song, Dandan and Ouyang, Peng and Yin, Shouyi},
  booktitle = {The Conference of the International Speech Communication Association (InterSpeech), },
  year = {2020},
}

ECCV

Part-aware Prototype Network for Few-shot Semantic Segmentation

Yongfei Liu*, Xiangyi Zhang*, Songyang Zhang, and Xuming He

In Proceeding of the European Conference on Computer Vision (ECCV), 2020

Bib HTML Code

@inproceedings{liu2020part,
  title = {Part-aware Prototype Network for Few-shot Semantic Segmentation},
  author = {Liu*, Yongfei and Zhang*, Xiangyi and Zhang, Songyang and He, Xuming},
  booktitle = {Proceeding of the European Conference on Computer Vision (ECCV),},
  year = {2020},
}

2019

ICML

LatentGNN: Learning Efficient Non-local Relations for Visual Recognition

Songyang Zhang, Shipeng Yan, and Xuming He

In Proceeding of the 36th International Conference on Machine Learning (ICML),, 2019

Bib HTML Code

@inproceedings{zhang2019latent,
  title = {LatentGNN: Learning Efficient Non-local Relations for Visual Recognition},
  author = {Zhang, Songyang and Yan, Shipeng and He, Xuming},
  booktitle = {Proceeding of the 36th International Conference on Machine Learning (ICML),,},
  year = {2019},
}

AAAI

A Dual Attention Network With Semantic Embedding for Few-shot Learning

Shipeng Yan*, Songyang Zhang*, and Xuming He

In Proceeding of Association for the Advancement of Artificial Intelligence (AAAI), 2019

Bib HTML

@inproceedings{yan2019adual,
  title = {A Dual Attention Network With Semantic Embedding for Few-shot Learning},
  author = {Yan*, Shipeng and Zhang*, Songyang and He, Xuming},
  booktitle = {Proceeding of Association for the Advancement of Artificial Intelligence (AAAI),},
  year = {2019},
}

ICCV

Dynamic Context Correspondence Network for Semantic Alignment

Shuaiyi Huang, Qiuyue Wang, Songyang Zhang, and Xuming He

In Proceedings of the IEEE/CVF International Conference on Computer Vision(ICCV), 2019

Bib HTML Code

@inproceedings{huang2019dynamic,
  title = {Dynamic Context Correspondence Network for Semantic Alignment},
  author = {Huang, Shuaiyi and Wang, Qiuyue and Zhang, Songyang and He, Xuming},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision(ICCV),},
  year = {2019},
}

2017

CVPR

Predicting Salient Face in Multiple-face Videos

Yufan Liu, Songyang Zhang, Mai Xu, and Xuming He

In Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2017

Bib HTML Code

@inproceedings{liu2017predicting,
  title = {Predicting Salient Face in Multiple-face Videos},
  author = {Liu, Yufan and Zhang, Songyang and Xu, Mai and He, Xuming},
  booktitle = {Proceeding of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR),},
  year = {2017},
}