import streamlit as lit import torch from transformers import BartForConditionalGeneration, PreTrainedTokenizerFast @lit.cache(allow_output_mutation = True) def loadModels(): repository = "rycont/biblify" _model = BartForConditionalGeneration.from_pretrained(repository) _tokenizer = PreTrainedTokenizerFast.from_pretrained(repository) print("Loaded :)") return _model, _tokenizer lit.title("성경말투 생성기") lit.caption("한 문장을 가장 잘 변환합니다. 제대로 동작하지 않다면 아래 링크로 이동해주세요") lit.caption("https://main-biblify-space-rycont.endpoint.ainize.ai/") model, tokenizer = loadModels() MAX_LENGTH = 128 def biblifyWithBeams(beam, tokens, attention_mask): generated = model.generate( input_ids = torch.Tensor([ tokens ]).to(torch.int64), attention_mask = torch.Tensor([ attentionMasks ]).to(torch.int64), num_beams = beam, max_length = MAX_LENGTH, eos_token_id=tokenizer.eos_token_id, bad_words_ids=[[tokenizer.unk_token_id]] )[0] return tokenizer.decode( generated, ).replace('', '').replace('', '') with lit.form("gen"): text_input = lit.text_input("문장 입력") submitted = lit.form_submit_button("생성") if len(text_input.strip()) > 0: print(text_input) text_input = "" + text_input + "" tokens = tokenizer.encode(text_input) tokenLength = len(tokens) attentionMasks = [ 1 ] * tokenLength + [ 0 ] * (MAX_LENGTH - tokenLength) tokens = tokens + [ tokenizer.pad_token_id ] * (MAX_LENGTH - tokenLength) results = [] for i in range(10)[5:]: generated = biblifyWithBeams( i + 1, tokens, attentionMasks ) if generated in results: print("중복됨") continue results.append(generated) with lit.expander(str(len(results)) + "번째 결과 (" + str(i +1) + ")", True): lit.write(generated) print(generated) lit.caption("및 " + str(5 - len(results)) + " 개의 중복된 결과")