File size: 2,404 Bytes
e10040f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18d32fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
title =  """# 🙋🏻‍♂️Welcome to Tonic's 🤖 Nemotron-Mini-4B Demo 🚀"""

description = """Nemotron-Mini-4B-Instruct is a model for generating responses for roleplaying, retrieval augmented generation, and function calling.  It is a small language model (SLM) optimized through distillation, pruning and quantization for speed and on-device deployment. It is a fine-tuned version of [nvidia/Minitron-4B-Base](https://huggingface.co/nvidia/Minitron-4B-Base), which was pruned and distilled from [Nemotron-4 15B](https://arxiv.org/abs/2402.16819) using [our LLM compression technique](https://arxiv.org/abs/2407.14679). This instruct model is optimized for roleplay, RAG QA, and function calling in English. It supports a context length of 4,096 tokens. This model is ready for commercial use.

Try this model on [build.nvidia.com](https://build.nvidia.com/nvidia/nemotron-mini-4b-instruct).

**Model Developer:** NVIDIA 

**Model Dates:** Nemotron-Mini-4B-Instruct was trained between February 2024 and Aug 2024.

## License

[NVIDIA Community Model License](https://huggingface.co/nvidia/Nemotron-Mini-4B-Instruct/blob/main/nvidia-community-model-license-aug2024.pdf)

## Model Architecture

Nemotron-Mini-4B-Instruct uses a model embedding size of 3072, 32 attention heads, and an MLP intermediate dimension of 9216. It also uses Grouped-Query Attention (GQA) and Rotary Position Embeddings (RoPE). 

**Architecture Type:** Transformer Decoder (auto-regressive language model) 

**Network Architecture:** Nemotron-4 

"""

customtool = """{
  "name": "custom_tool",
  "description": "A custom tool defined by the user",
  "parameters": {
    "type": "object",
    "properties": {
      "param1": {
        "type": "string",
        "description": "First parameter of the custom tool"
      },
      "param2": {
        "type": "string",
        "description": "Second parameter of the custom tool"
      }
    },
    "required": ["param1"]
  }
}"""

example = """{{
  "name": "get_current_weather",
  "description": "Get the current weather in a given location",
  "parameters": {{
    "type": "object",
    "properties": {{
      "location": {{
        "type": "string",
        "description": "The city and state, e.g. San Francisco, CA"
      }},
      "unit": {{
        "type": "string",
        "enum": ["celsius", "fahrenheit"]
      }}
    }},
    "required": ["location"]
  }}
}}"""