ve88ifz2's picture
Add new SentenceTransformer model.
3f6e57a verified
metadata
base_model: liddlefish/privacy_embedding_rag_10k_base_checkpoint_2
language:
  - en
library_name: sentence-transformers
license: apache-2.0
metrics:
  - cosine_accuracy@1
  - cosine_accuracy@3
  - cosine_accuracy@5
  - cosine_accuracy@10
  - cosine_precision@1
  - cosine_precision@3
  - cosine_precision@5
  - cosine_precision@10
  - cosine_recall@1
  - cosine_recall@3
  - cosine_recall@5
  - cosine_recall@10
  - cosine_ndcg@10
  - cosine_mrr@10
  - cosine_map@100
pipeline_tag: sentence-similarity
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - dataset_size:1K<n<10K
  - loss:MatryoshkaLoss
  - loss:MultipleNegativesRankingLoss
widget:
  - source_sentence: Żywot św. Stanisława
    sentences:
      - czym różni się Żywot św. Stanisława od Legendy św. Stanisława?
      - czemu gra The Saboteur wywołała wiele kontrowersji?
      - Muzykę do obrazu skomponowali Marco Frisina i Ennio Morricone.
  - source_sentence: Jaakow Jicchak Szapira
    sentences:
      - cadykiem którego miasta był Jaakow Jicchak Dan Landau?
      - gdzie zginął przedwojenny minister Antoni Olszewski?
      - ' W 1867 oddano do użytku Kolej Warszawsko-Terespolską (całą linię).'
  - source_sentence: Chłopiec z Nariokotome
    sentences:
      - ile wynosiła objętość mózgu chłopca z Nariokotome?
      - czemu gra The Saboteur wywołała wiele kontrowersji?
      - Akcja powieści rozgrywa się w XV-wiecznej Polsce.
  - source_sentence: Stany Zjednoczone Polski
    sentences:
      - kiedy miały szansę powstać Stany Zjednoczone Polski?
      - z jakiego powodu Chloé wywołała skandal w Melbourne?
      - komu przysługiwał tytuł autokratora?
  - source_sentence: Sen o zastrzyku Irmy
    sentences:
      - gdzie Freud spotkał Irmę we śnie o zastrzyku Irmy?
      - dlaczego Ōkunoshima została wymazana z map Japonii?
      - który samochód wyglądem nawiązuje do Mercedesa-Benza SLS AMG?
model-index:
  - name: privacy_embedding_rag_10k_base_checkpoint_2-klej-dyk-v0.1
    results:
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 768
          type: dim_768
        metrics:
          - type: cosine_accuracy@1
            value: 0.1875
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.4543269230769231
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.6225961538461539
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.7307692307692307
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.1875
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.15144230769230768
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.12451923076923076
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.07307692307692307
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.1875
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.4543269230769231
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.6225961538461539
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.7307692307692307
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.4453345212200682
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.35500896672771654
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.36239083059244687
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 512
          type: dim_512
        metrics:
          - type: cosine_accuracy@1
            value: 0.18269230769230768
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.44471153846153844
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.6033653846153846
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.7115384615384616
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.18269230769230768
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.14823717948717946
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.12067307692307691
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.07115384615384614
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.18269230769230768
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.44471153846153844
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.6033653846153846
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.7115384615384616
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.43488982498130374
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.347151633089133
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.3548109777991144
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 256
          type: dim_256
        metrics:
          - type: cosine_accuracy@1
            value: 0.1875
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.4230769230769231
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.5576923076923077
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.6682692307692307
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.1875
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.14102564102564102
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.11153846153846153
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.06682692307692308
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.1875
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.4230769230769231
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.5576923076923077
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.6682692307692307
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.41398239515933494
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.3336862789987789
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.3427233175204077
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 128
          type: dim_128
        metrics:
          - type: cosine_accuracy@1
            value: 0.17067307692307693
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.36778846153846156
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.5120192307692307
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.6009615384615384
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.17067307692307693
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.12259615384615384
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.10240384615384614
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.06009615384615385
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.17067307692307693
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.36778846153846156
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.5120192307692307
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.6009615384615384
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.371201964014572
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.2987818605006104
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.3066873839005868
            name: Cosine Map@100
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: dim 64
          type: dim_64
        metrics:
          - type: cosine_accuracy@1
            value: 0.15865384615384615
            name: Cosine Accuracy@1
          - type: cosine_accuracy@3
            value: 0.31009615384615385
            name: Cosine Accuracy@3
          - type: cosine_accuracy@5
            value: 0.3870192307692308
            name: Cosine Accuracy@5
          - type: cosine_accuracy@10
            value: 0.49278846153846156
            name: Cosine Accuracy@10
          - type: cosine_precision@1
            value: 0.15865384615384615
            name: Cosine Precision@1
          - type: cosine_precision@3
            value: 0.10336538461538461
            name: Cosine Precision@3
          - type: cosine_precision@5
            value: 0.07740384615384616
            name: Cosine Precision@5
          - type: cosine_precision@10
            value: 0.04927884615384615
            name: Cosine Precision@10
          - type: cosine_recall@1
            value: 0.15865384615384615
            name: Cosine Recall@1
          - type: cosine_recall@3
            value: 0.31009615384615385
            name: Cosine Recall@3
          - type: cosine_recall@5
            value: 0.3870192307692308
            name: Cosine Recall@5
          - type: cosine_recall@10
            value: 0.49278846153846156
            name: Cosine Recall@10
          - type: cosine_ndcg@10
            value: 0.3130531482964966
            name: Cosine Ndcg@10
          - type: cosine_mrr@10
            value: 0.2569225045787546
            name: Cosine Mrr@10
          - type: cosine_map@100
            value: 0.2651139739879426
            name: Cosine Map@100

privacy_embedding_rag_10k_base_checkpoint_2-klej-dyk-v0.1

This is a sentence-transformers model finetuned from liddlefish/privacy_embedding_rag_10k_base_checkpoint_2. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("sentence_transformers_model_id")
# Run inference
sentences = [
    'Sen o zastrzyku Irmy',
    'gdzie Freud spotkał Irmę we śnie o zastrzyku Irmy?',
    'dlaczego Ōkunoshima została wymazana z map Japonii?',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Information Retrieval

Metric Value
cosine_accuracy@1 0.1875
cosine_accuracy@3 0.4543
cosine_accuracy@5 0.6226
cosine_accuracy@10 0.7308
cosine_precision@1 0.1875
cosine_precision@3 0.1514
cosine_precision@5 0.1245
cosine_precision@10 0.0731
cosine_recall@1 0.1875
cosine_recall@3 0.4543
cosine_recall@5 0.6226
cosine_recall@10 0.7308
cosine_ndcg@10 0.4453
cosine_mrr@10 0.355
cosine_map@100 0.3624

Information Retrieval

Metric Value
cosine_accuracy@1 0.1827
cosine_accuracy@3 0.4447
cosine_accuracy@5 0.6034
cosine_accuracy@10 0.7115
cosine_precision@1 0.1827
cosine_precision@3 0.1482
cosine_precision@5 0.1207
cosine_precision@10 0.0712
cosine_recall@1 0.1827
cosine_recall@3 0.4447
cosine_recall@5 0.6034
cosine_recall@10 0.7115
cosine_ndcg@10 0.4349
cosine_mrr@10 0.3472
cosine_map@100 0.3548

Information Retrieval

Metric Value
cosine_accuracy@1 0.1875
cosine_accuracy@3 0.4231
cosine_accuracy@5 0.5577
cosine_accuracy@10 0.6683
cosine_precision@1 0.1875
cosine_precision@3 0.141
cosine_precision@5 0.1115
cosine_precision@10 0.0668
cosine_recall@1 0.1875
cosine_recall@3 0.4231
cosine_recall@5 0.5577
cosine_recall@10 0.6683
cosine_ndcg@10 0.414
cosine_mrr@10 0.3337
cosine_map@100 0.3427

Information Retrieval

Metric Value
cosine_accuracy@1 0.1707
cosine_accuracy@3 0.3678
cosine_accuracy@5 0.512
cosine_accuracy@10 0.601
cosine_precision@1 0.1707
cosine_precision@3 0.1226
cosine_precision@5 0.1024
cosine_precision@10 0.0601
cosine_recall@1 0.1707
cosine_recall@3 0.3678
cosine_recall@5 0.512
cosine_recall@10 0.601
cosine_ndcg@10 0.3712
cosine_mrr@10 0.2988
cosine_map@100 0.3067

Information Retrieval

Metric Value
cosine_accuracy@1 0.1587
cosine_accuracy@3 0.3101
cosine_accuracy@5 0.387
cosine_accuracy@10 0.4928
cosine_precision@1 0.1587
cosine_precision@3 0.1034
cosine_precision@5 0.0774
cosine_precision@10 0.0493
cosine_recall@1 0.1587
cosine_recall@3 0.3101
cosine_recall@5 0.387
cosine_recall@10 0.4928
cosine_ndcg@10 0.3131
cosine_mrr@10 0.2569
cosine_map@100 0.2651

Training Details

Training Dataset

Unnamed Dataset

  • Size: 3,738 training samples
  • Columns: positive and anchor
  • Approximate statistics based on the first 1000 samples:
    positive anchor
    type string string
    details
    • min: 7 tokens
    • mean: 89.43 tokens
    • max: 507 tokens
    • min: 9 tokens
    • mean: 30.98 tokens
    • max: 76 tokens
  • Samples:
    positive anchor
    Zespół Blaua (zespół Jabsa, ang. Blau syndrome, BS) – rzadka choroba genetyczna o dziedziczeniu autosomalnym dominującym, charakteryzująca się ziarniniakowym zapaleniem stawów o wczesnym początku, zapaleniem jagodówki (uveitis) i wysypką skórną, a także kamptodaktylią. jakie choroby genetyczne dziedziczą się autosomalnie dominująco?
    Gorgippia Gorgippia – starożytne miasto bosporańskie nad Morzem Czarnym, którego pozostałości znajdują się obecnie pod współczesną zabudową centralnej części miasta Anapa w Kraju Krasnodarskim w Rosji. gdzie obecnie znajduje się starożytne miasto Gorgippia?
    Ulubionym dystansem Rücker było 400 metrów i to na nim notowała największe indywidualne sukcesy : srebrny medal Mistrzostw Europy juniorów w lekkoatletyce (Saloniki 1991) 6. miejsce w Pucharze Świata w Lekkoatletyce (Hawana 1992) 5. miejsce na Mistrzostwach Europy w Lekkoatletyce (Helsinki 1994) srebro podczas Mistrzostw Świata w Lekkoatletyce (Sewilla 1999) złota medalistka mistrzostw Niemiec Duże sukcesy odnosiła także w sztafecie 4 x 400 metrów : złoto Mistrzostw Europy juniorów w lekkoatletyce (Varaždin 1989) złoty medal Mistrzostw Europy juniorów w lekkoatletyce (Saloniki 1991) brąz na Mistrzostwach Europy w Lekkoatletyce (Helsinki 1994) brązowy medal podczas Igrzysk Olimpijskich (Atlanta 1996) brąz na Halowych Mistrzostwach Świata w Lekkoatletyce (Paryż 1997) złoto Mistrzostw Świata w Lekkoatletyce (Ateny 1997) brązowy medal Mistrzostw Świata w Lekkoatletyce (Sewilla 1999) kto zaprojektował medale, które będą wręczane podczas tegorocznych mistrzostw Europy juniorów w lekkoatletyce?
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "MultipleNegativesRankingLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: epoch
  • per_device_train_batch_size: 16
  • per_device_eval_batch_size: 16
  • gradient_accumulation_steps: 16
  • learning_rate: 2e-05
  • num_train_epochs: 5
  • lr_scheduler_type: cosine
  • warmup_ratio: 0.1
  • bf16: True
  • tf32: True
  • load_best_model_at_end: True
  • optim: adamw_torch_fused
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: epoch
  • prediction_loss_only: True
  • per_device_train_batch_size: 16
  • per_device_eval_batch_size: 16
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 16
  • eval_accumulation_steps: None
  • learning_rate: 2e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 5
  • max_steps: -1
  • lr_scheduler_type: cosine
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: True
  • fp16: False
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: True
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: True
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch_fused
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss dim_128_cosine_map@100 dim_256_cosine_map@100 dim_512_cosine_map@100 dim_64_cosine_map@100 dim_768_cosine_map@100
0.0684 1 9.112 - - - - -
0.1368 2 9.5133 - - - - -
0.2051 3 9.0946 - - - - -
0.2735 4 8.9744 - - - - -
0.3419 5 7.9039 - - - - -
0.4103 6 8.1973 - - - - -
0.4786 7 6.8979 - - - - -
0.5470 8 7.0324 - - - - -
0.6154 9 6.6472 - - - - -
0.6838 10 6.3009 - - - - -
0.7521 11 6.8778 - - - - -
0.8205 12 5.9809 - - - - -
0.8889 13 5.3054 - - - - -
0.9573 14 5.706 0.2868 0.3280 0.3522 0.2415 0.3477
1.0256 15 5.0592 - - - - -
1.0940 16 4.7655 - - - - -
1.1624 17 4.9682 - - - - -
1.2308 18 5.1226 - - - - -
1.2991 19 4.8655 - - - - -
1.3675 20 4.2008 - - - - -
1.4359 21 5.0281 - - - - -
1.5043 22 4.3074 - - - - -
1.5726 23 4.3163 - - - - -
1.6410 24 3.9344 - - - - -
1.7094 25 4.6567 - - - - -
1.7778 26 4.5145 - - - - -
1.8462 27 4.1319 - - - - -
1.9145 28 3.8768 - - - - -
1.9829 29 3.5525 0.2986 0.3330 0.3483 0.2590 0.3534
2.0513 30 3.8693 - - - - -
2.1197 31 3.4675 - - - - -
2.1880 32 4.0598 - - - - -
2.2564 33 4.2429 - - - - -
2.3248 34 3.3686 - - - - -
2.3932 35 3.2663 - - - - -
2.4615 36 3.8585 - - - - -
2.5299 37 3.1157 - - - - -
2.5983 38 3.5254 - - - - -
2.6667 39 3.2782 - - - - -
2.7350 40 4.3151 - - - - -
2.8034 41 3.4567 - - - - -
2.8718 42 3.3976 - - - - -
2.9402 43 3.3945 0.3014 0.3343 0.3522 0.2626 0.3593
3.0085 44 3.4487 - - - - -
3.0769 45 3.0021 - - - - -
3.1453 46 3.2332 - - - - -
3.2137 47 3.3012 - - - - -
3.2821 48 3.2735 - - - - -
3.3504 49 2.5335 - - - - -
3.4188 50 3.7025 - - - - -
3.4872 51 2.8596 - - - - -
3.5556 52 3.1108 - - - - -
3.6239 53 3.2807 - - - - -
3.6923 54 3.1604 - - - - -
3.7607 55 3.7179 - - - - -
3.8291 56 3.3418 - - - - -
3.8974 57 2.9735 - - - - -
3.9658 58 3.2755 0.3066 0.3409 0.3546 0.2653 0.3626
4.0342 59 3.1444 - - - - -
4.1026 60 3.0212 - - - - -
4.1709 61 3.1298 - - - - -
4.2393 62 3.3195 - - - - -
4.3077 63 2.996 - - - - -
4.3761 64 2.4636 - - - - -
4.4444 65 3.2388 - - - - -
4.5128 66 2.747 - - - - -
4.5812 67 2.8715 - - - - -
4.6496 68 3.1402 - - - - -
4.7179 69 3.547 - - - - -
4.7863 70 3.6094 0.3067 0.3427 0.3548 0.2651 0.3624
  • The bold row denotes the saved checkpoint.

Framework Versions

  • Python: 3.12.2
  • Sentence Transformers: 3.0.0
  • Transformers: 4.41.2
  • PyTorch: 2.3.1
  • Accelerate: 0.27.2
  • Datasets: 2.19.1
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply}, 
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}