File size: 5,225 Bytes
e042085
 
 
 
 
66854bf
 
e042085
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66854bf
190f21f
 
66854bf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
## Global Variables

API_BASE = "https://api.01.ai/v1"
API_KEY = "your key"

model_name = 'nvidia/NV-Embed-v1'

title = """
# 👋🏻Welcome to 🙋🏻‍♂️Tonic's 📽️Nvidia 🛌🏻Embed V-1 !"""

description = """
You can use this Space to test out the current model [nvidia/NV-Embed-v1](https://huggingface.co/nvidia/NV-Embed-v1). 🐣a generalist embedding model that ranks No. 1 on the Massive Text Embedding Benchmark (MTEB benchmark)(as of May 24, 2024), with 56 tasks, encompassing retrieval, reranking, classification, clustering, and semantic textual similarity tasks.
You can also use 📽️Nvidia 🛌🏻Embed V-1 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/NV-Embed?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [MultiTonic](https://github.com/MultiTonic) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""

tasks = {
        'ClimateFEVER': 'Given a claim about climate change, retrieve documents that support or refute the claim',
        'DBPedia': 'Given a query, retrieve relevant entity descriptions from DBPedia',
        'FEVER': 'Given a claim, retrieve documents that support or refute the claim',
        'FiQA2018': 'Given a financial question, retrieve user replies that best answer the question',
        'HotpotQA': 'Given a multi-hop question, retrieve documents that can help answer the question',
        'MSMARCO': 'Given a web search query, retrieve relevant passages that answer the query',
        'NFCorpus': 'Given a question, retrieve relevant documents that best answer the question',
        'NQ': 'Given a question, retrieve Wikipedia passages that answer the question',
        'QuoraRetrieval': 'Given a question, retrieve questions that are semantically equivalent to the given question',
        'SCIDOCS': 'Given a scientific paper title, retrieve paper abstracts that are cited by the given paper',
}

intention_prompt= """
  "type": "object",
  "properties": {
    "ClimateFEVER": {
      "type": "boolean",
      "description" : "select this for climate science related text"
    },
    "DBPedia": {
      "type": "boolean",
      "description" : "select this for encyclopedic related knowledge"
    },
    "FEVER": {
      "type": "boolean",
      "description": "select this to verify a claim or embed a claim"
    },
    "FiQA2018": {
      "type": "boolean",
      "description" : "select this for financial questions or topics"
    },
    "HotpotQA": {
      "type": "boolean",
      "description" : "select this for a multi-hop question or for texts that provide multihop claims"
    },
    "MSMARCO": {
      "type": "boolean",
      "description": "Given a web search query, retrieve relevant passages that answer the query"
    },
    "NFCorpus": {
      "type": "boolean",
      "description" : "Given a question, retrieve relevant documents that best answer the question"
    },
    "NQ": {
      "type": "boolean",
      "description" : "Given a question, retrieve Wikipedia passages that answer the question"
    },
    "QuoraRetrieval": {
      "type": "boolean",
      "description": "Given a question, retrieve questions that are semantically equivalent to the given question"
    },
    "SCIDOCS": {
      "type": "boolean",
      "description": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper"
    }
  },
  "required": [
    "ClimateFEVER",
    "DBPedia",
    "FEVER",
    "FiQA2018",
    "HotpotQA",
    "MSMARCO",
    "NFCorpus",
    "NQ",
    "QuoraRetrieval",
    "SCIDOCS",
  ]
produce a complete json schema."

you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""

metadata_prompt = "you will recieve a text or a question, produce metadata operator pairs for the text . ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION , ONLY PRODUCE ONE METADATA STRING PER OPERATOR:"

system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""