Rebiber / app.py
(Bill) Yuchen Lin
hide the download button to avoid confusion before submit
722ef03
raw
history blame contribute delete
No virus
7.2 kB
import gradio as gr
import rebiber
import os
import uuid
# Load Bib Database
filepath = os.path.abspath(rebiber.__file__).replace("__init__.py","")
bib_list_path = os.path.join(filepath, "bib_list.txt")
abbr_tsv_path = "abbr.tsv"
bib_db = rebiber.construct_bib_db(bib_list_path, start_dir=filepath)
abbr_dict = rebiber.normalize.load_abbr_tsv(abbr_tsv_path)
def process(input_bib, shorten, remove_keys, deduplicate, sort):
if "@" not in input_bib:
return "N/A"
global abbr_dict
# print(f"remove_keys={remove_keys}")
random_id = uuid.uuid4().hex
with open(f"input_{random_id}.bib", "w") as f:
f.write(input_bib.replace("\t", " "))
all_bib_entries = rebiber.load_bib_file(f"input_{random_id}.bib")
print("# Input Bib Entries:", len(all_bib_entries))
abbr_dict_pass = []
if shorten:
abbr_dict_pass = abbr_dict
rebiber.normalize_bib(bib_db, all_bib_entries, f"output_{random_id}.bib",
abbr_dict=abbr_dict_pass,
deduplicate=deduplicate,
sort=sort,
removed_value_names=remove_keys)
with open(f"output_{random_id}.bib") as f:
output_bib = f.read().replace("\n ", "\n ")
# delete both files
# print(output_bib)
return output_bib, random_id, gr.update(visible=True)
example_input = """
@article{lin2020birds,
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
journal={arXiv preprint arXiv:2005.00683},
year={2020}
}
@inproceedings{Lin2020CommonGenAC,
title={CommonGen: A Constrained Text Generation Challenge for Generative Commonsense Reasoning},
author={Bill Yuchen Lin and Minghan Shen and Wangchunshu Zhou and Pei Zhou and Chandra Bhagavatula and Yejin Choi and Xiang Ren},
booktitle={Findings},
year={2020}
}
"""
examples = [[example_input]]
# iface = gr.Interface(fn=process,
# inputs=gr.inputs.Textbox(lines=30, label="Input BIB"),
# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
# examples=examples,
# allow_flagging="never"
# )
with gr.Blocks() as demo:
gr.Markdown(
'''# Rebiber: A tool for normalizing bibtex with official info.
<table>
<tr>
<td>
<a href="https://yuchenlin.xyz/">
<img src="https://img.shields.io/badge/Yuchen%20Lin-๐Ÿผ-blue?style=social">
</a>
</td>
<td>
<a href="https://github.com/yuchenlin/rebiber">
<img src="https://img.shields.io/badge/Github--blue?style=social&logo=github">
</a>
</td>
<td>
<a href="https://twitter.com/billyuchenlin/status/1353850378438070272?s=20">
<img src="https://img.shields.io/badge/Tweet--blue?style=social&logo=twitter">
</a>
</td>
</tr>
</table>
<span style="font-size:13pt">
We often cite papers using their arXiv versions without noting that they are already __PUBLISHED__ in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences.
We introduce __Rebiber__, a simple tool in Python to fix them automatically. It is based on the official conference information from the [DBLP](https://dblp.org/) or [the ACL anthology](https://www.aclweb.org/anthology/) (for NLP conferences)!
Apart from handling outdated arXiv citations, __Rebiber__ also normalizes citations in a unified way (DBLP-style), supporting abbreviation and value selection.
</span>
'''
)
with gr.Row():
with gr.Column(scale=3):
input_bib = gr.Textbox(lines=15, label="Input BIB", value=example_input, interactive=True)
removekeys = gr.CheckboxGroup(["url", "biburl", "address", "publisher", "pages", "doi", "volume", "bibsource"],
value=[False, False, False, False, False, False, False, False],
label="Remove Keys", info="Which keys to remove?")
shorten = gr.Checkbox(label="Abbreviation", info="Shorten the conference/journal names (e.g., `Proceedings of the 2020 International Conference of ...` --> `Proc. of ICML')", value=False)
dedup = gr.Checkbox(label="Deduplicate entries.", value=False)
sort = gr.Checkbox(label="Sort alphabetically by ID.", value=False)
with gr.Row():
clr_button = gr.Button("Clear")
button = gr.Button("Submit")
ex_uuid = gr.Text(label="UUID")
ex_uuid.visible = False
with gr.Column(scale=3):
output=gr.Textbox(label="Output BIB (Note that you can copy the output bib file by clicking the top-right button.)").style(show_copy_button=True, interactive=False)
download_btn = gr.Button("Generate Bib File")
download_btn.visible = False
download_content = gr.outputs.File()
download_content.visible = False
def download_file(ex_uuid):
global download_content
# Replace this with your code to generate/download the file
file_path = f"output_{ex_uuid}.bib"
download_content.update(visible=False)
return file_path, gr.update(visible=True)
download_btn.click(download_file, inputs=ex_uuid, outputs=[download_content,download_content])
button.click(process, inputs=[input_bib, shorten, removekeys, dedup, sort], outputs=[output, ex_uuid, download_btn], api_name = "process")
def clean(text):
return ""
clr_button.click(clean, input_bib, input_bib)
# gr.Interface(fn=process,
# outputs=gr.outputs.Textbox(label="Output BIB").style(show_copy_button=True),
# examples=examples,
# allow_flagging="never",
# scroll_to_output=True,
# show_progress=True,
# )
if __name__ == "__main__":
demo.launch()
"""
@article{lin2020birds,
title={Birds have four legs?! NumerSense: Probing Numerical Commonsense Knowledge of Pre-trained Language Models},
author={Lin, Bill Yuchen and Lee, Seyeon and Khanna, Rahul and Ren, Xiang},
journal={arXiv preprint arXiv:2005.00683},
year={2020}
}
@inproceedings{lin2020birds,
address = {Online},
author = {Lin, Bill Yuchen and
Lee, Seyeon and
Khanna, Rahul and
Ren, Xiang},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
doi = {10.18653/v1/2020.emnlp-main.557},
pages = {6862--6868},
publisher = {Association for Computational Linguistics},
title = {{B}irds have four legs?! {N}umer{S}ense: {P}robing {N}umerical {C}ommonsense {K}nowledge of {P}re-{T}rained {L}anguage {M}odels},
url = {https://aclanthology.org/2020.emnlp-main.557},
year = {2020}
}
"""