mrebel-demo

Sleeping

App Files Files Community

PereLluis13 commited on Jun 20, 2023

Commit

da80bd2

•

1 Parent(s): e02be2a

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -3

app.py CHANGED Viewed

@@ -7,7 +7,10 @@ import torch
 def load_tok_and_data(lan):
     st_time = time()
-    tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", src_lang=_Tokens[lan], tgt_lang="tp_XX")
     dataset = load_dataset('Babelscape/SREDFM', lan, split="validation", streaming=True)
     dataset = [example for example in dataset.take(1001)]
     return (tokenizer, dataset)
@@ -59,7 +62,7 @@ def extract_triplets_typed(text):
         triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
     return triplets
-st.markdown("""This is a demo for the Findings of EMNLP 2021 paper [REBEL: Relation Extraction By End-to-end Language generation](https://github.com/Babelscape/rebel/blob/main/docs/EMNLP_2021_REBEL__Camera_Ready_.pdf). The pre-trained model is able to extract triplets for up to 200 relation types from Wikidata or be used in downstream Relation Extraction task by fine-tuning. Find the model card [here](https://huggingface.co/Babelscape/rebel-large). Read more about it in the [paper](https://aclanthology.org/2021.findings-emnlp.204) and in the original [repository](https://github.com/Babelscape/rebel).""")
 model = load_model()
@@ -105,7 +108,11 @@ st.write(text)
 if not agree:
     st.title('Silver output')
-    st.write(dataset[dataset_example]['relations'])
 st.title('Prediction text')
 decoded_preds = [text.replace('<s>', '').replace('</s>', '').replace('<pad>', '') for text in decoded_preds]

 def load_tok_and_data(lan):
     st_time = time()
+    tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", tgt_lang="tp_XX")
+    tokenizer._src_lang = _Tokens[lan]
+    tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids(_Tokens[lan])
+    tokenizer.set_src_lang_special_tokens(_Tokens[lan])
     dataset = load_dataset('Babelscape/SREDFM', lan, split="validation", streaming=True)
     dataset = [example for example in dataset.take(1001)]
     return (tokenizer, dataset)
         triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
     return triplets
+st.markdown("""This is a demo for the ACL 2023 paper [RED<sup>FM</sup>: a Filtered and Multilingual Relation Extraction Dataset](https://arxiv.org/abs/2306.09802). The pre-trained model is able to extract triplets for up to 400 relation types from Wikidata or be used in downstream Relation Extraction task by fine-tuning. Find the model card [here](https://huggingface.co/Babelscape/mrebel-large). Read more about it in the [paper](https://arxiv.org/abs/2306.09802) and in the original [repository](https://github.com/Babelscape/rebel#REDFM).""")
 model = load_model()
 if not agree:
     st.title('Silver output')
+    entities = dataset[dataset_example]['entities']
+    relations =[]
+    for trip in dataset[dataset_example]['relations']:
+      relations.append({'subject': entities[trip['subject']], 'predicate': trip['predicate'], 'object': entities[trip['object']]})
+    st.write(relations)
 st.title('Prediction text')
 decoded_preds = [text.replace('<s>', '').replace('</s>', '').replace('<pad>', '') for text in decoded_preds]