--- license: apache-2.0 language: - fa --- ### Installing Libraries Make sure these libraries are installed correctly. * ```pip install -q sentencepiece ``` * ```pip install -q transformers ``` * ```pip install -q accelerate ``` * ```pip install --upgrade -q bitsandbytes ``` ```python import torch from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer model_path = "Neurai/llama7b" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained( "Neurai/llama7b", # load_in_8bit=True, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto", ) model.eval() print('model loaded') SYS_PROMPT = "زرافه چند سال عمر میکند؟" def response_generate(input_prompt): input_ids = tokenizer(input_prompt, return_tensors="pt") outputs = model.generate( inputs=input_ids["input_ids"].to("cuda"), attention_mask=input_ids["attention_mask"].to("cuda"), do_sample=True, temperature=0.3, top_k=50, top_p=0.9, max_new_tokens=512, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id ) response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] return response print(response_generate(f"{SYS_PROMPT}")) ```