gutalk_st / app.py
blackwingedkite's picture
Update app.py
f4348d4
raw
history blame contribute delete
No virus
4.1 kB
import streamlit as st
import torch
import transformers
from transformers import pipeline
from transformers import LlamaTokenizer, LlamaForCausalLM
import time
import csv
import locale
locale.getpreferredencoding = lambda: "UTF-8"
#https://huggingface.co/shibing624/chinese-alpaca-plus-7b-hf
#https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
#https://huggingface.co/minlik/chinese-alpaca-plus-7b-merged
def generate_prompt(text):
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{text}
### Response:"""
tokenizer = LlamaTokenizer.from_pretrained('shibing624/chinese-alpaca-plus-7b-hf')
pipeline = pipeline(
"text-generation",
model="shibing624/chinese-alpaca-plus-7b-hf",
torch_dtype=torch.float32,
device_map="auto",
)
st.title("Chinese text generation alpaca2")
st.write("Enter a sentence and alpaca2 will answer:")
user_input = st.text_input("")
with open('alpaca_output.csv', 'a', newline='',encoding = "utf-8") as csvfile:
writer = csv.writer(csvfile)
# writer.writerow(["stockname",'prompt','answer','time'])
if user_input:
if user_input[0] == ".":
stockname = user_input[1:4]
analysis = user_input[4:]
text = f"""請以肯定和專業的語氣,一步一步的思考並回答以下關於{stockname}的問題,避免空洞的答覆:
- 請回答關於{stockname}的問題,請總結給予的資料以及資料解釋,並整合出金融上的洞見。\n
- 請不要生成任何資料沒有提供的數據,即便你已知道。\n
- 請假裝這些資料都是你預先知道的知識。因此,請不要提到「根據資料」、「基於上述資料」等回答
- 請不要說「好的、我明白了、根據我的要求、以下是我的答案」等贅詞,請輸出分析結果即可\n
- 請寫300字到500字之間,若合適,可以進行分類、列點
資料:{stockname}{analysis}
請特別注意,分析結果包含籌碼面、基本面以及技術面,請針對這三個面向進行回答,並且特別注意個別符合幾項和不符合幾項。籌碼面、技術面和基本面滿分十分,總計滿分為30分。
三個面向中,符合5項以上代表該面項表現好,反之是該面項表現差。
"""
prompt = generate_prompt(text)
start = time.time()
sequences = pipeline(
prompt,
do_sample=True,
top_k=40,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=200,
)
end = time.time()
for seq in sequences:
st.write(f"Result: {seq}") #seq['generated_text']
st.write(f"time: {(end-start):.2f}")
writer.writerow([stockname,text,sequences,f"time: {(end-start):.2f}"])
# input_ids = tokenizer.encode(prompt, return_tensors='pt').to('cuda')
# with torch.no_grad():
# output_ids = model.generate(
# input_ids=input_ids,
# max_new_tokens=2048,
# top_k=40,
# ).cuda()
# output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
else:
prompt = generate_prompt(user_input)
start = time.time()
sequences = pipeline(
prompt,
do_sample=True,
top_k=40,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=200,
)
end = time.time()
for seq in sequences:
st.write(f"Result: {seq}") #seq['generated_text']
st.write(f"time: {(end-start):.2f}")
writer.writerow(["無",user_input,sequences,f"time: {(end-start):.2f}"])