@inproceedings{hu-etal-2023-llm, title = "{LLM}-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models", author = "Hu, Zhiqiang and Wang, Lei and Lan, Yihuai and Xu, Wanyu and Lim, Ee-Peng and Bing, Lidong and Xu, Xing and Poria, Soujanya and Lee, Roy", editor = "Bouamor, Houda and Pino, Juan and Bali, Kalika", booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing", month = dec, year = "2023", address = "Singapore", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.emnlp-main.319", doi = "10.18653/v1/2023.emnlp-main.319", pages = "5254--5276", abstract = "The success of large language models (LLMs), like GPT-4 and ChatGPT, has led to the development of numerous cost-effective and accessible alternatives that are created by finetuning open-access LLMs with task-specific data (e.g., ChatDoctor) or instruction data (e.g., Alpaca). Among the various fine-tuning methods, adapter-based parameter-efficient fine-tuning (PEFT) is undoubtedly one of the most attractive topics, as it only requires fine-tuning a few external parameters instead of the entire LLMs while achieving comparable or even better performance. To enable further research on PEFT methods of LLMs, this paper presents LLM-Adapters, an easy-to-use framework that integrates various adapters into LLMs and can execute these adapter-based PEFT methods of LLMs for different tasks. The framework includes state-of-the-art open-access LLMs such as LLaMA, BLOOM, and GPT-J, as well as widely used adapters such as Series adapters, Parallel adapter, Prompt-based learning and Reparametrization-based methods. Moreover, we conduct extensive empirical studies on the impact of adapter types, placement locations, and hyper-parameters to the best design for each adapter-based methods. We evaluate the effectiveness of the adapters on fourteen datasets from two different reasoning tasks, Arithmetic Reasoning and Commonsense Reasoning. The results demonstrate that using adapter-based PEFT in smaller-scale LLMs (7B) with few extra trainable parameters yields comparable, and in some cases superior, performance to powerful LLMs (175B) in zero-shot inference on simple math reasoning datasets.", }
<?xml version="1.0" encoding="UTF-8"?> <modsCollection xmlns="http://www.loc.gov/mods/v3"> <mods ID="hu-etal-2023-llm"> <titleInfo> <title>LLM-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models</title> </titleInfo> <name type="personal"> <namePart type="given">Zhiqiang</namePart> <namePart type="family">Hu</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Lei</namePart> <namePart type="family">Wang</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Yihuai</namePart> <namePart type="family">Lan</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Wanyu</namePart> <namePart type="family">Xu</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Ee-Peng</namePart> <namePart type="family">Lim</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Lidong</namePart> <namePart type="family">Bing</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Xing</namePart> <namePart type="family">Xu</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Soujanya</namePart> <namePart type="family">Poria</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Roy</namePart> <namePart type="family">Lee</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <originInfo> <dateIssued>2023-12</dateIssued> </originInfo> <typeOfResource>text</typeOfResource> <relatedItem type="host"> <titleInfo> <title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title> </titleInfo> <name type="personal"> <namePart type="given">Houda</namePart> <namePart type="family">Bouamor</namePart> <role> <roleTerm authority="marcrelator" type="text">editor</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Juan</namePart> <namePart type="family">Pino</namePart> <role> <roleTerm authority="marcrelator" type="text">editor</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Kalika</namePart> <namePart type="family">Bali</namePart> <role> <roleTerm authority="marcrelator" type="text">editor</roleTerm> </role> </name> <originInfo> <publisher>Association for Computational Linguistics</publisher> <place> <placeTerm type="text">Singapore</placeTerm> </place> </originInfo> <genre authority="marcgt">conference publication</genre> </relatedItem> <abstract>The success of large language models (LLMs), like GPT-4 and ChatGPT, has led to the development of numerous cost-effective and accessible alternatives that are created by finetuning open-access LLMs with task-specific data (e.g., ChatDoctor) or instruction data (e.g., Alpaca). Among the various fine-tuning methods, adapter-based parameter-efficient fine-tuning (PEFT) is undoubtedly one of the most attractive topics, as it only requires fine-tuning a few external parameters instead of the entire LLMs while achieving comparable or even better performance. To enable further research on PEFT methods of LLMs, this paper presents LLM-Adapters, an easy-to-use framework that integrates various adapters into LLMs and can execute these adapter-based PEFT methods of LLMs for different tasks. The framework includes state-of-the-art open-access LLMs such as LLaMA, BLOOM, and GPT-J, as well as widely used adapters such as Series adapters, Parallel adapter, Prompt-based learning and Reparametrization-based methods. Moreover, we conduct extensive empirical studies on the impact of adapter types, placement locations, and hyper-parameters to the best design for each adapter-based methods. We evaluate the effectiveness of the adapters on fourteen datasets from two different reasoning tasks, Arithmetic Reasoning and Commonsense Reasoning. The results demonstrate that using adapter-based PEFT in smaller-scale LLMs (7B) with few extra trainable parameters yields comparable, and in some cases superior, performance to powerful LLMs (175B) in zero-shot inference on simple math reasoning datasets.</abstract> <identifier type="citekey">hu-etal-2023-llm</identifier> <identifier type="doi">10.18653/v1/2023.emnlp-main.319</identifier> <location> <url>https://aclanthology.org/2023.emnlp-main.319</url> </location> <part> <date>2023-12</date> <extent unit="page"> <start>5254</start> <end>5276</end> </extent> </part> </mods> </modsCollection>
%0 Conference Proceedings %T LLM-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models %A Hu, Zhiqiang %A Wang, Lei %A Lan, Yihuai %A Xu, Wanyu %A Lim, Ee-Peng %A Bing, Lidong %A Xu, Xing %A Poria, Soujanya %A Lee, Roy %Y Bouamor, Houda %Y Pino, Juan %Y Bali, Kalika %S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing %D 2023 %8 December %I Association for Computational Linguistics %C Singapore %F hu-etal-2023-llm %X The success of large language models (LLMs), like GPT-4 and ChatGPT, has led to the development of numerous cost-effective and accessible alternatives that are created by finetuning open-access LLMs with task-specific data (e.g., ChatDoctor) or instruction data (e.g., Alpaca). Among the various fine-tuning methods, adapter-based parameter-efficient fine-tuning (PEFT) is undoubtedly one of the most attractive topics, as it only requires fine-tuning a few external parameters instead of the entire LLMs while achieving comparable or even better performance. To enable further research on PEFT methods of LLMs, this paper presents LLM-Adapters, an easy-to-use framework that integrates various adapters into LLMs and can execute these adapter-based PEFT methods of LLMs for different tasks. The framework includes state-of-the-art open-access LLMs such as LLaMA, BLOOM, and GPT-J, as well as widely used adapters such as Series adapters, Parallel adapter, Prompt-based learning and Reparametrization-based methods. Moreover, we conduct extensive empirical studies on the impact of adapter types, placement locations, and hyper-parameters to the best design for each adapter-based methods. We evaluate the effectiveness of the adapters on fourteen datasets from two different reasoning tasks, Arithmetic Reasoning and Commonsense Reasoning. The results demonstrate that using adapter-based PEFT in smaller-scale LLMs (7B) with few extra trainable parameters yields comparable, and in some cases superior, performance to powerful LLMs (175B) in zero-shot inference on simple math reasoning datasets. %R 10.18653/v1/2023.emnlp-main.319 %U https://aclanthology.org/2023.emnlp-main.319 %U https://doi.org/10.18653/v1/2023.emnlp-main.319 %P 5254-5276
Markdown (Informal)
[LLM-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models](https://aclanthology.org/2023.emnlp-main.319) (Hu et al., EMNLP 2023)
ACL
- Zhiqiang Hu, Lei Wang, Yihuai Lan, Wanyu Xu, Ee-Peng Lim, Lidong Bing, Xing Xu, Soujanya Poria, and Roy Lee. 2023. LLM-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pages 5254–5276, Singapore. Association for Computational Linguistics.