""" File: model_llm.py Description: Load a Large Language Model (LLM) Author: Didier Guillevic Date: 2024-03-16 """ import torch import transformers from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import TextIteratorStreamer import os model_name = "mistralai/Mistral-7B-Instruct-v0.3" auth_token = os.environ.get("HF_TOKEN") # Auto-regressive model for language completion: padding left tokenizer = AutoTokenizer.from_pretrained( model_name, padding_side="left", use_auth_token=auth_token ) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.float16, low_cpu_mem_usage=True, use_auth_token=auth_token ) model = torch.compile(model)