PereLluis13 commited on
Commit
da80bd2
1 Parent(s): e02be2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -7,7 +7,10 @@ import torch
7
 
8
  def load_tok_and_data(lan):
9
  st_time = time()
10
- tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", src_lang=_Tokens[lan], tgt_lang="tp_XX")
 
 
 
11
  dataset = load_dataset('Babelscape/SREDFM', lan, split="validation", streaming=True)
12
  dataset = [example for example in dataset.take(1001)]
13
  return (tokenizer, dataset)
@@ -59,7 +62,7 @@ def extract_triplets_typed(text):
59
  triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
60
  return triplets
61
 
62
- st.markdown("""This is a demo for the Findings of EMNLP 2021 paper [REBEL: Relation Extraction By End-to-end Language generation](https://github.com/Babelscape/rebel/blob/main/docs/EMNLP_2021_REBEL__Camera_Ready_.pdf). The pre-trained model is able to extract triplets for up to 200 relation types from Wikidata or be used in downstream Relation Extraction task by fine-tuning. Find the model card [here](https://huggingface.co/Babelscape/rebel-large). Read more about it in the [paper](https://aclanthology.org/2021.findings-emnlp.204) and in the original [repository](https://github.com/Babelscape/rebel).""")
63
 
64
  model = load_model()
65
 
@@ -105,7 +108,11 @@ st.write(text)
105
 
106
  if not agree:
107
  st.title('Silver output')
108
- st.write(dataset[dataset_example]['relations'])
 
 
 
 
109
 
110
  st.title('Prediction text')
111
  decoded_preds = [text.replace('<s>', '').replace('</s>', '').replace('<pad>', '') for text in decoded_preds]
 
7
 
8
  def load_tok_and_data(lan):
9
  st_time = time()
10
+ tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", tgt_lang="tp_XX")
11
+ tokenizer._src_lang = _Tokens[lan]
12
+ tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids(_Tokens[lan])
13
+ tokenizer.set_src_lang_special_tokens(_Tokens[lan])
14
  dataset = load_dataset('Babelscape/SREDFM', lan, split="validation", streaming=True)
15
  dataset = [example for example in dataset.take(1001)]
16
  return (tokenizer, dataset)
 
62
  triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
63
  return triplets
64
 
65
+ st.markdown("""This is a demo for the ACL 2023 paper [RED<sup>FM</sup>: a Filtered and Multilingual Relation Extraction Dataset](https://arxiv.org/abs/2306.09802). The pre-trained model is able to extract triplets for up to 400 relation types from Wikidata or be used in downstream Relation Extraction task by fine-tuning. Find the model card [here](https://huggingface.co/Babelscape/mrebel-large). Read more about it in the [paper](https://arxiv.org/abs/2306.09802) and in the original [repository](https://github.com/Babelscape/rebel#REDFM).""")
66
 
67
  model = load_model()
68
 
 
108
 
109
  if not agree:
110
  st.title('Silver output')
111
+ entities = dataset[dataset_example]['entities']
112
+ relations =[]
113
+ for trip in dataset[dataset_example]['relations']:
114
+ relations.append({'subject': entities[trip['subject']], 'predicate': trip['predicate'], 'object': entities[trip['object']]})
115
+ st.write(relations)
116
 
117
  st.title('Prediction text')
118
  decoded_preds = [text.replace('<s>', '').replace('</s>', '').replace('<pad>', '') for text in decoded_preds]