talk-to-me

Sleeping

Braddy commited on Jan 30

Commit

1ccdf9c

•

1 Parent(s): 17be8c2

include deepsparse

Files changed (2) hide show

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 from langchain.chains import LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.llms import LlamaCpp
 from langchain_experimental.chat_models import Llama2Chat
 from langchain.prompts.chat import (
@@ -35,6 +36,13 @@ llm = LlamaCpp(
     temperature=0.75,
     max_tokens=64
 )
 model = Llama2Chat(llm=llm)
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

 from langchain.chains import LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.llms import LlamaCpp
+from langchain.llms import DeepSparse
 from langchain_experimental.chat_models import Llama2Chat
 from langchain.prompts.chat import (
     temperature=0.75,
     max_tokens=64
 )
+llm = DeepSparse(
+    model="zoo:llama2-7b-llama2_chat_llama2_pretrain-base_quantized",
+    model_config={"sequence_length": 2048},
+    stop=["<|im_end|>", "<|endoftext|>"]
+)
 model = Llama2Chat(llm=llm)
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ langchain_community
 langchain_experimental
 llama-cpp-python
 unstructured
-unstructured[local-inference]

 langchain_experimental
 llama-cpp-python
 unstructured
+unstructured[local-inference]
+deepsparse-nightly[llm]