Stefan commited on
Commit
5b6e243
1 Parent(s): e250f84

fix(spaces): remove types

Browse files
Files changed (2) hide show
  1. embedding.py +1 -1
  2. processing.py +2 -2
embedding.py CHANGED
@@ -8,7 +8,7 @@ model = AutoModel.from_pretrained("intfloat/e5-large-v2")
8
  EMBEDDING_CHAR_LIMIT = 512
9
 
10
 
11
- def average_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
12
  last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
13
  return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
14
 
 
8
  EMBEDDING_CHAR_LIMIT = 512
9
 
10
 
11
+ def average_pool(last_hidden_states: Tensor, attention_mask: Tensor):
12
  last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
13
  return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
14
 
processing.py CHANGED
@@ -30,7 +30,7 @@ def unmark(text):
30
  return __md.convert(text)
31
 
32
 
33
- def clean_md(text: str) -> list[str]:
34
  cleantext = re.sub(HTMLR, "", text)
35
  cleantext = re.sub(LIGHTGALLERY, "", cleantext)
36
  para = cleantext.split("\n#")
@@ -42,7 +42,7 @@ def clean_md(text: str) -> list[str]:
42
  start_seq_length = num_tokens_from_str("passage: ")
43
 
44
 
45
- def truncate_to_sequences(text: str, max_char=EMBEDDING_CHAR_LIMIT) -> list[str]:
46
  sequence_length = num_tokens_from_str(text) // (max_char - start_seq_length) + 1
47
  length = len(text)
48
  separator = length // sequence_length
 
30
  return __md.convert(text)
31
 
32
 
33
+ def clean_md(text: str):
34
  cleantext = re.sub(HTMLR, "", text)
35
  cleantext = re.sub(LIGHTGALLERY, "", cleantext)
36
  para = cleantext.split("\n#")
 
42
  start_seq_length = num_tokens_from_str("passage: ")
43
 
44
 
45
+ def truncate_to_sequences(text: str, max_char=EMBEDDING_CHAR_LIMIT):
46
  sequence_length = num_tokens_from_str(text) // (max_char - start_seq_length) + 1
47
  length = len(text)
48
  separator = length // sequence_length