I am using the below function to print the trainable parameters. I am getting this output:
**trainable params: 262410240 || all params: 7241732096 || trainable%: 3.6235839233122604**
def print_trainable_parameters(model):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for param_name, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(param_name)
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
device = torch.device("cuda:0") # the device to load the model onto
model = MistralForCausalLM.from_pretrained(model_path,torch_dtype=torch.float16,load_in_8bit=True,trust_remote_code=True)
````