Is anything wrong in the above code?

#15
by vpkprasanna - opened
This comment has been hidden
vpkprasanna changed discussion status to closed
  import torch
  from transformers import AutoConfig,AutoModelForCausalLM,AutoTokenizer,TextIteratorStreamer
  from typing import Iterator


  model_id = "codellama/CodeLlama-13b-Instruct-hf"
  DEFAULT_SYSTEM_PROMPT = """\
  You are a helpful, respectful and honest assistant with a deep knowledge of code and software design. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, 
  unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, 
  explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\
  """
  MAX_MAX_NEW_TOKENS = 4096
  DEFAULT_MAX_NEW_TOKENS = 1024
  MAX_INPUT_TOKEN_LENGTH = 4000


  if torch.cuda.is_available():
config = AutoConfig.from_pretrained(model_id)
# config.pretraining_tp = 1
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    config=config,
    # torch_dtype=torch.float16,
    # load_in_4bit=True,
    device_map='auto',
    use_safetensors=True,
)
  else:
model = None
  tokenizer = AutoTokenizer.from_pretrained(model_id)


  def get_prompt(message: str, chat_history: list[tuple[str, str]],
           system_prompt: str) -> str:
texts = [f'<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n']
# The first user input is _not_ stripped
do_strip = False
for user_input, response in chat_history:
    user_input = user_input.strip() if do_strip else user_input
    do_strip = True
    texts.append(f'{user_input} [/INST] {response.strip()} </s><s>[INST] ')
message = message.strip() if do_strip else message
texts.append(f'{message} [/INST]')
return ''.join(texts)


  def get_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> int:
prompt = get_prompt(message, chat_history, system_prompt)
input_ids = tokenizer([prompt], return_tensors='np', add_special_tokens=False)['input_ids']
return input_ids.shape[-1]
  
  def run(message: str,
    chat_history: list[tuple[str, str]],
    system_prompt: str,
    max_new_tokens: int = 1024,
    temperature: float = 0.1,
    top_p: float = 0.9,
    top_k: int = 50) -> Iterator[str]:
prompt = get_prompt(message, chat_history, system_prompt)
inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')


generate_kwargs = dict(
    inputs,
    # streamer=streamer,
    max_new_tokens=max_new_tokens,
    do_sample=True,
    top_p=top_p,
    top_k=top_k,
    temperature=temperature,
    num_beams=1,
)
result = model.generate(**generate_kwargs)
result = tokenizer.batch_decode(result)
yield result

  def generate(
message: str,
history_with_input: list[tuple[str, str]],
system_prompt: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int,
  ) -> Iterator[list[tuple[str, str]]]:
if max_new_tokens > MAX_MAX_NEW_TOKENS:
    raise ValueError

history = history_with_input[:-1]
generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
try:
    first_response = next(generator)
    yield first_response
except StopIteration:
    raise "Something went wrong"
# for response in generator:
#     yield history + [(message, response)]



  def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 1, 0.95, 50)
for x in generator:
    pass
return '', x

  response_value = process_example(message="create an HTML which has 4 fields to get employee information such as name , age , phone number and sex along with validation ")
  print(response_value[1])

  print(type(response_value[1]))
vpkprasanna changed discussion status to open

Sign up or log in to comment