Spaces:

BEE-spoke-data
/

beecoder-playground

Sleeping

App Files Files Community

beecoder-playground / app.py

pszemraj

✏️

7e75f34 11 months ago

raw

history blame contribute delete

No virus

7.68 kB

	import gradio as gr
	import torch
	from gradio.themes.utils import sizes
	from transformers import AutoModelForCausalLM, AutoTokenizer

	import utils
	from constants import END_OF_TEXT, MIN_TEMPERATURE

	# Load the tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(
	"BEE-spoke-data/smol_llama-101M-GQA-python",
	use_fast=False,
	)
	tokenizer.pad_token_id = tokenizer.eos_token_id
	tokenizer.pad_token = END_OF_TEXT
	model = AutoModelForCausalLM.from_pretrained(
	"BEE-spoke-data/smol_llama-101M-GQA-python",
	device_map="auto",
	)
	model = torch.compile(model, mode="reduce-overhead")

	# UI things

	_styles = utils.get_file_as_string("styles.css")

	# Loads ./README.md file & splits it into sections
	readme_file_content = utils.get_file_as_string("README.md", path="./")
	(
	manifest,
	description,
	disclaimer,
	base_model_info,
	formats,
	) = utils.get_sections(readme_file_content, "---", up_to=5)

	theme = gr.themes.Soft(
	primary_hue="yellow",
	secondary_hue="orange",
	neutral_hue="slate",
	radius_size=sizes.radius_sm,
	font=[
	gr.themes.GoogleFont("IBM Plex Sans", [400, 600]),
	"ui-sans-serif",
	"system-ui",
	"sans-serif",
	],
	text_size=sizes.text_lg,
	)


	def run_inference(
	prompt, temperature, max_new_tokens, top_p, repetition_penalty
	) -> str:
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	outputs = model.generate(
	**inputs,
	do_sample=True,
	epsilon_cutoff=1e-3,
	max_new_tokens=max_new_tokens,
	min_new_tokens=2,
	no_repeat_ngram_size=6,
	renormalize_logits=True,
	repetition_penalty=repetition_penalty,
	temperature=max(temperature, MIN_TEMPERATURE),
	top_p=top_p,
	)
	text = tokenizer.batch_decode(
	outputs,
	skip_special_tokens=True,
	)[0]
	return text


	examples = [
	[
	'def greet(name: str) -> None:\n """\n Greets the user\n """\n print(f"Hello,',
	0.2,
	64,
	0.9,
	1.2,
	],
	[
	'for i in range(5):\n """\n Loop through 0 to 4\n """\n print(i,',
	0.2,
	64,
	0.9,
	1.2,
	],
	['x = 10\n"""Check if x is greater than 5"""\nif x > 5:', 0.2, 64, 0.9, 1.2],
	["def square(x: int) -> int:\n return", 0.2, 64, 0.9, 1.2],
	['import math\n"""Math operations"""\nmath.', 0.2, 64, 0.9, 1.2],
	[
	'def is_even(n) -> bool:\n """\n Check if a number is even\n """\n if n % 2 == 0:',
	0.2,
	64,
	0.9,
	1.2,
	],
	[
	'while True:\n """Infinite loop example"""\n print("Infinite loop,',
	0.2,
	64,
	0.9,
	1.2,
	],
	[
	"def sum_list(lst: list[int]) -> int:\n total = 0\n for item in lst:",
	0.2,
	64,
	0.9,
	1.2,
	],
	[
	'try:\n """\n Exception handling\n """\n x = int(input("Enter a number: "))\nexcept ValueError:',
	0.2,
	64,
	0.9,
	1.2,
	],
	[
	'def divide(a: float, b: float) -> float:\n """\n Divide a by b\n """\n if b != 0:',
	0.2,
	64,
	0.9,
	1.2,
	],
	]


	# Define the Gradio Blocks interface
	with gr.Blocks(theme=theme, analytics_enabled=False, css=_styles) as demo:
	with gr.Column():
	gr.Markdown(description)
	with gr.Row():
	with gr.Column():
	instruction = gr.Textbox(
	value=examples[0][0],
	placeholder="Enter your code here",
	label="Code",
	elem_id="q-input",
	)
	submit = gr.Button("Generate", variant="primary")
	output = gr.Code(elem_id="q-output", language="python", lines=10)
	with gr.Row():
	with gr.Column():
	with gr.Accordion("Advanced settings", open=False):
	with gr.Row():
	column_1, column_2 = gr.Column(), gr.Column()
	with column_1:
	temperature = gr.Slider(
	label="Temperature",
	value=0.2,
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	interactive=True,
	info="Higher values produce more diverse outputs",
	)
	max_new_tokens = gr.Slider(
	label="Max new tokens",
	value=64,
	minimum=32,
	maximum=512,
	step=32,
	interactive=True,
	info="Number of tokens to generate",
	)
	with column_2:
	top_p = gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.90,
	minimum=0.0,
	maximum=1,
	step=0.05,
	interactive=True,
	info="Higher values sample more low-probability tokens",
	)
	repetition_penalty = gr.Slider(
	label="Repetition penalty",
	value=1.2,
	minimum=1.0,
	maximum=2.0,
	step=0.05,
	interactive=True,
	info="Penalize repeated tokens",
	)
	with gr.Column():
	version = gr.Dropdown(
	[
	"smol_llama-101M-GQA-python",
	],
	value="smol_llama-101M-GQA-python",
	label="Version",
	info="",
	)
	gr.Markdown(disclaimer)
	gr.Examples(
	examples=examples,
	inputs=[
	instruction,
	temperature,
	max_new_tokens,
	top_p,
	repetition_penalty,
	version,
	],
	cache_examples=False,
	fn=run_inference,
	outputs=[output],
	)
	gr.Markdown(base_model_info)
	gr.Markdown(formats)

	submit.click(
	run_inference,
	inputs=[
	instruction,
	temperature,
	max_new_tokens,
	top_p,
	repetition_penalty,
	],
	outputs=[output],
	# preprocess=False,
	# batch=False,
	show_progress=True,
	)

	# .queue(max_size=10, api_open=False)
	demo.launch(
	debug=True,
	show_api=False,
	share=utils.is_google_colab(),
	)