File size: 4,045 Bytes
e042085
 
 
 
 
a6d437d
66854bf
e042085
 
 
 
 
 
 
 
 
 
a6d437d
 
 
 
 
 
e042085
 
a6d437d
 
e042085
 
a6d437d
e042085
a6d437d
e042085
a6d437d
e042085
a6d437d
e042085
a6d437d
e042085
a6d437d
e042085
a6d437d
e042085
a6d437d
e042085
a6d437d
e042085
a6d437d
e042085
 
 
a6d437d
 
 
 
 
e042085
a6d437d
e042085
 
66854bf
190f21f
 
66854bf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
## Global Variables

API_BASE = "https://api.01.ai/v1"
API_KEY = "your key"

model_name = "jinaai/jina-embeddings-v3"

title = """
# 👋🏻Welcome to 🙋🏻‍♂️Tonic's 📽️Nvidia 🛌🏻Embed V-1 !"""

description = """
You can use this Space to test out the current model [nvidia/NV-Embed-v1](https://huggingface.co/nvidia/NV-Embed-v1). 🐣a generalist embedding model that ranks No. 1 on the Massive Text Embedding Benchmark (MTEB benchmark)(as of May 24, 2024), with 56 tasks, encompassing retrieval, reranking, classification, clustering, and semantic textual similarity tasks.
You can also use 📽️Nvidia 🛌🏻Embed V-1 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/NV-Embed?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [MultiTonic](https://github.com/MultiTonic) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""

tasks = {
    'retrieval.query': 'Used for query embeddings in asymmetric retrieval tasks',
    'retrieval.passage': 'Used for passage embeddings in asymmetric retrieval tasks',
    'separation': 'Used for embeddings in clustering and re-ranking applications',
    'classification': 'Used for embeddings in classification tasks',
    'text-matching': 'Used for embeddings in tasks that quantify similarity between two texts, such as STS or symmetric retrieval tasks',
    'DEFAULT': 'Used for general-purpose embeddings when no specific task is specified'
}

intention_prompt = """
{
  "type": "object",
  "properties": {
    "retrieval.query": {
      "type": "boolean",
      "description": "Select this for query embeddings in asymmetric retrieval tasks"
    },
    "retrieval.passage": {
      "type": "boolean",
      "description": "Select this for passage embeddings in asymmetric retrieval tasks"
    },
    "separation": {
      "type": "boolean",
      "description": "Select this for embeddings in clustering and re-ranking applications"
    },
    "classification": {
      "type": "boolean",
      "description": "Select this for embeddings in classification tasks"
    },
    "text-matching": {
      "type": "boolean",
      "description": "Select this for embeddings in tasks that quantify similarity between two texts, such as STS or symmetric retrieval tasks"
    }
  },
  "required": [
    "retrieval.query",
    "retrieval.passage",
    "separation",
    "classification",
    "text-matching"
  ]
}

you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""

metadata_prompt = "you will recieve a text or a question, produce metadata operator pairs for the text . ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION , ONLY PRODUCE ONE METADATA STRING PER OPERATOR:"

system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""