|
|
|
"""LLM Training Cost Calculator App.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1iZpCUgC5T_ASnlDgMYm1n4RH8BZsm7sx |
|
""" |
|
|
|
|
|
|
|
import gradio as gr |
|
|
|
def estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85): |
|
""" |
|
Estimates the training cost of a large language model based on the selected GPU and precision. |
|
|
|
Args: |
|
- gpu_choice (str): The choice of GPU, e.g., 'A100 80GB PCIe', 'V100', etc. |
|
- precision (str): The precision level for the GPU, e.g., 'bf16', 'tf32', 'tensor'. |
|
- number_of_parameters (int): The number of parameters in the model. |
|
- number_of_tokens (int): The number of tokens to train on. |
|
- utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%). |
|
- overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead). |
|
- cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour. |
|
|
|
Returns: |
|
- float: The estimated total cost of training the model. |
|
|
|
The function dynamically adjusts the GPU throughput based on the selected GPU and precision. The throughput values are predefined for each GPU and precision combination. This estimation assumes a linear scaling of training cost with the number of parameters and tokens. |
|
""" |
|
|
|
gpu_throughputs = { |
|
'A100 80GB PCIe': {'bf16': 312e12, 'tf32': 156e12}, |
|
'A100 80GB SXM': {'bf16': 624e12, 'tf32': 312e12}, |
|
'V100': {'tensor': 130e12}, |
|
'H100 SXM': {'bf16': 1979e12, 'tf32': 989e12}, |
|
'H100 PCIe': {'bf16': 1513e12, 'tf32': 756e12} |
|
} |
|
|
|
|
|
gpu_throughput = gpu_throughputs[gpu_choice][precision] |
|
|
|
|
|
total_flops = 6 * number_of_parameters * number_of_tokens |
|
|
|
|
|
gpu_hours = total_flops / (gpu_throughput * 3600) |
|
|
|
|
|
adjusted_gpu_hours = gpu_hours / utilization_rate |
|
|
|
|
|
actual_gpu_hours = adjusted_gpu_hours * overhead |
|
|
|
|
|
total_cost = actual_gpu_hours * cost_per_gpu_hour |
|
|
|
return total_cost |
|
|
|
def gradio_interface(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour): |
|
number_of_parameters = float(number_of_parameters) * 1e9 |
|
number_of_tokens = float(number_of_tokens) * 1e12 |
|
utilization_rate = float(utilization_rate) |
|
overhead = float(overhead) |
|
cost_per_gpu_hour = float(cost_per_gpu_hour) |
|
|
|
cost = estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour) |
|
return f"The estimated training cost is ${cost:,.2f}" |
|
|
|
gpu_choices = ["A100 80GB PCIe", "A100 80GB SXM", "V100", "H100 SXM", "H100 PCIe"] |
|
default_precisions = ['bf16', 'tf32', 'tensor', 'bf16', 'bf16'] |
|
|
|
|
|
title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>" |
|
description = """ |
|
<p style='text-align: center;'>Estimate the cost of training large language models (LLM). This tool helps you calculate the cost based on model parameters, tokens, and GPU selections with various precision options. Select a GPU and the precision level to get an accurate cost estimate.</p> |
|
<p><strong>Available GPUs and Precisions:</strong></p> |
|
<ul> |
|
<li><strong>A100 80GB PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li> |
|
<li><strong>A100 80GB SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li> |
|
<li><strong>V100:</strong> Uses Deep Learning performance with Tensor Cores (tensor) as the default and only precision.</li> |
|
<li><strong>H100 SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li> |
|
<li><strong>H100 PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li> |
|
</ul> |
|
<p>The choice of GPU and precision impacts the throughput, affecting training time and cost. BFLOAT16 is generally faster and more cost-effective, while Tensor Float 32 offers higher precision. The V100 GPU is optimized for Deep Learning with Tensor Cores.</p> |
|
<p style='text-align: center;'>We plan to extend this calculator to include calculating the cost of fine-tuning models using strategies like LoRA or QLoRA. Stay tuned for updates where you'll be able to input the model ID from the Hugging Face Hub, select the fine-tuning strategy, and specify quantization details if QLoRA is chosen.</p> |
|
""" |
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=[ |
|
gr.Dropdown(choices=gpu_choices, label="Select GPU", value='A100 80GB PCIe'), |
|
gr.Dropdown(choices=['bf16', 'tf32', 'tensor'], label="Select Precision", value='bf16'), |
|
gr.Textbox(label="Number of Parameters (in billions)", value="70"), |
|
gr.Textbox(label="Number of Tokens (in trillions)", value="2"), |
|
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"), |
|
gr.Slider(minimum=1.0, maximum=2.0, step=0.01, value=1.10, label="Overhead (1 + overhead percentage)"), |
|
gr.Textbox(label="Cost per GPU Hour ($)", value="1.85") |
|
], |
|
outputs=[gr.Textbox(label="Estimated Training Cost")], |
|
title=title, |
|
description=description, |
|
article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>" |
|
) |
|
|
|
iface.launch() |
|
|