Update README.md
Browse files
README.md
CHANGED
@@ -22,7 +22,7 @@ model_name = "NchuNLP/Chinese-Question-Answering"
|
|
22 |
tokenizer = BertTokenizerFast.from_pretrained(model_name)
|
23 |
model = BertForQuestionAnswering.from_pretrained(model_name)
|
24 |
|
25 |
-
# a) Get predictions
|
26 |
nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)
|
27 |
QA_input = {
|
28 |
'question': '中興大學在哪裡?',
|
@@ -30,6 +30,46 @@ QA_input = {
|
|
30 |
}
|
31 |
res = nlp(QA_input)
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
```
|
34 |
|
35 |
## Authors
|
|
|
22 |
tokenizer = BertTokenizerFast.from_pretrained(model_name)
|
23 |
model = BertForQuestionAnswering.from_pretrained(model_name)
|
24 |
|
25 |
+
# a) Get predictions
|
26 |
nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)
|
27 |
QA_input = {
|
28 |
'question': '中興大學在哪裡?',
|
|
|
30 |
}
|
31 |
res = nlp(QA_input)
|
32 |
|
33 |
+
# b) Inside the Question answering pipeline
|
34 |
+
|
35 |
+
inputs = tokenizer(query, text, return_tensors="pt",padding=True, truncation=True, max_length=512, stride=256)
|
36 |
+
outputs = model(**inputs)
|
37 |
+
|
38 |
+
sequence_ids = inputs.sequence_ids()
|
39 |
+
# Mask everything apart from the tokens of the context
|
40 |
+
mask = [i != 1 for i in sequence_ids]
|
41 |
+
# Unmask the [CLS] token
|
42 |
+
mask[0] = False
|
43 |
+
mask = torch.tensor(mask)[None]
|
44 |
+
|
45 |
+
start_logits[mask] = -10000
|
46 |
+
end_logits[mask] = -10000
|
47 |
+
|
48 |
+
start_probabilities = torch.nn.functional.softmax(start_logits, dim=-1)[0]
|
49 |
+
end_probabilities = torch.nn.functional.softmax(end_logits, dim=-1)[0]
|
50 |
+
|
51 |
+
scores = start_probabilities[:, None] * end_probabilities[None, :]
|
52 |
+
|
53 |
+
max_index = scores.argmax().item()
|
54 |
+
start_index = max_index // scores.shape[1]
|
55 |
+
end_index = max_index % scores.shape[1]
|
56 |
+
|
57 |
+
|
58 |
+
inputs_with_offsets = tokenizer(query, text, return_offsets_mapping=True)
|
59 |
+
offsets = inputs_with_offsets["offset_mapping"]
|
60 |
+
|
61 |
+
start_char, _ = offsets[start_index]
|
62 |
+
_, end_char = offsets[end_index]
|
63 |
+
answer = text[start_char:end_char]
|
64 |
+
|
65 |
+
result = {
|
66 |
+
"answer": answer,
|
67 |
+
"start": start_char,
|
68 |
+
"end": end_char,
|
69 |
+
"score": scores[start_index, end_index],
|
70 |
+
}
|
71 |
+
print(result)
|
72 |
+
|
73 |
```
|
74 |
|
75 |
## Authors
|