Llama 3 portuguese Tom cat 8b instruct GGUF

This model was trained with a superset of 300,000 chat in Portuguese. The model comes to help fill the gap in models in Portuguese. Tuned from the Tom cat 8b instruct , the model was adjusted mainly for chat.

!git lfs install
!pip install langchain
!pip install langchain-community langchain-core
!pip install llama-cpp-python

!git clone https://huggingface.co/rhaymison/Llama-3-portuguese-Tom-cat-8b-instruct-q8-gguf/

def llamacpp():
    from langchain.llms import LlamaCpp
    from langchain.prompts import PromptTemplate
    from langchain.chains import LLMChain
    
    llm = LlamaCpp(
        model_path="/content/Llama-3-portuguese-Tom-cat-8b-instruct-q8-gguf",
        n_gpu_layers=40,
        n_batch=512,
        verbose=True,
    )

    
    template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    Abaixo está uma instrução que descreve uma tarefa, juntamente com uma entrada que fornece mais contexto. Escreva uma resposta que complete adequadamente o pedido.<|eot_id|><|start_header_id|>user<|end_header_id|>
    { question }<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""


    prompt = PromptTemplate(template=template, input_variables=["question"])
    
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    
    question = "instrução: aja como um professor de matemática e me explique porque 2 + 2 = 4?"
    response = llm_chain.run({"question": question})
    print(response)