Edit model card

GGUF версия: https://huggingface.co/pirbis/Vikhr-7B-instruct_0.2-GGUF

from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch
import os
os.environ['HF_HOME']='.'
MODEL_NAME = "Vikhrmodels/Vikhr-7B-instruct_0.2"
DEFAULT_MESSAGE_TEMPLATE = "<s>{role}\n{content}</s>\n"
DEFAULT_SYSTEM_PROMPT = "Ты — Вихрь, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."

class Conversation:
    def __init__(
        self,
        message_template=DEFAULT_MESSAGE_TEMPLATE,
        system_prompt=DEFAULT_SYSTEM_PROMPT,
    ):
        self.message_template = message_template
        self.messages = [{
            "role": "system",
            "content": system_prompt
        }]

    def add_user_message(self, message):
        self.messages.append({
            "role": "user",
            "content": message
        })

    def get_prompt(self, tokenizer):
        final_text = ""
        for message in self.messages:
            message_text = self.message_template.format(**message)
            final_text += message_text
        final_text += 'bot'
        return final_text.strip()


def generate(model, tokenizer, prompt, generation_config):
    data = tokenizer(prompt, return_tensors="pt")
    data = {k: v.to(model.device) for k, v in data.items()}
    output_ids = model.generate(
        **data,
        generation_config=generation_config
    )[0]
    output_ids = output_ids[len(data["input_ids"][0]):]
    output = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output.strip()

#config = PeftConfig.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto"
)
#model = PeftModel.from_pretrained(    model,   MODEL_NAME,   torch_dtype=torch.float16)
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_length=256
generation_config.top_p=0.9
generation_config.top_k=30
generation_config.do_sample = True
print(generation_config)

inputs = ["Как тебя зовут?", "Кто такой Колмогоров?"]

for inp in inputs:
    conversation = Conversation()
    conversation.add_user_message(inp)
    prompt = conversation.get_prompt(tokenizer)

    output = generate(model, tokenizer, prompt, generation_config)
    print(inp)
    print(output)
    print('\n')

wandb

Downloads last month
173
Safetensors
Model size
6.97B params
Tensor type
FP16
·
Inference API
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Datasets used to train Vikhrmodels/Vikhr-7B-instruct_0.2