import gradio as gr import os from pii_transform.api.e2e import PiiTextProcessor from pii_extract.defs import FMT_CONFIG_PLUGIN examples = [] with open("examples.txt", "r") as f: examples = f.readlines() examples_truncated = [example[:50] + "..." for example in examples] language_choices = { "English": "en", "Italian": "it", "Spanish": "es", "Portuguese": "pt", "German": "de", "French": "fr", } language_code = "en" cache_dir = "/home/user/app/cache" os.makedirs(cache_dir, exist_ok=True) if os.path.isdir(cache_dir): gr.Info("Cache directory created at "+cache_dir) else: gr.Warning("Cache directory creation error") policy_help_string = """ Policies are defined as follows: 1. **Annotate** - replace the PII instance by a \ string, i.e. include both the PII type and its value 2. **Redact** - all PII instances are replaced by a \ generic string 3. **Placeholder** - replace with a prototypical value 4. **Synthetic** - substitute with synthetic data For more information on the transformation policies, please refer to the guide [here](https://github.com/piisa/pii-transform/blob/main/doc/policies.md#pii-transformation-policies)""" header_string = """ ## [PIISA](https://privacyprotection.substack.com/p/towards-a-common-privacy-api-introducing) **PIISA** (Personally Identifiable Information Standard Architecture) is a set of tools to detect and remediate PII within large scale language data. It uses best of breed tools like [🤗 transformers](https://huggingface.co/docs/transformers/index) libraries, [spaCy](https://spacy.io/), regular expressions, [Faker](https://faker.readthedocs.io/en/master/) and [Presidio](https://microsoft.github.io/presidio/) to leverage best practices for effectively managing data privacy in accordance with your privacy policies. Important links: 1. [PIISA API docs](https://github.com/piisa/piisa) 2. [Blog](https://privacyprotection.substack.com/) 3. [LinkedIn](https://www.linkedin.com/company/piisa/) This demo uses the multi-lingual [wikineural model](https://huggingface.co/Babelscape/wikineural-multilingual-ner) from [Babelscape](https://huggingface.co/Babelscape). ### ▵ We're looking for any feedback and/or suggestions, so please open a new thread in the Discussions tab ▵ """ def change_language(language_selection): global language_code language_code = language_choices[language_selection] gr.Info(f"{language_selection} selected") def process(text, policy): # Create the object, defining the language to use and the policy # Further customization is possible by providing a config policy = policy.lower() if text == "": print("Empty text field") gr.Warning("No text present") return "" # Custom config to prevent loading of the Presidio plugin proc = PiiTextProcessor( lang=language_code, default_policy=policy, config="config.json" ) # Process a text buffer and get the transformed buffer outbuf = proc(text) return outbuf def get_full_example(idx): return examples[idx] with gr.Blocks() as demo: with gr.Row(): with gr.Column(): gr.Markdown(value=header_string) with gr.Column(scale=0, min_width=100): pass with gr.Column(scale=0, min_width=100): logo = gr.Image( "image.jpeg", height=100, width=100, show_label=False, show_download_button=False, show_share_button=False, mask_opacity=1.0, ) with gr.Row(): with gr.Column(scale=2, min_width=400): text_original = gr.Textbox( label="Original Text", lines=13, placeholder="Enter the text you would like to analyze, or select from one of the examples below", ) with gr.Column(scale=0, min_width=25): pass with gr.Column(scale=0, min_width=150): gr.Markdown(value="""

Select Language

""") lang_picker = gr.Dropdown( choices=list(language_choices.keys()), label="", value=list(language_choices.keys())[0], type="value", container=False, ) lang_picker.select(change_language, inputs=lang_picker, outputs=None) gr.Markdown(value="""

Select Policy

""") annotate_btn = gr.Button(value="Annotate", variant="primary", size="sm") redact_btn = gr.Button(value="Redact", variant="primary", size="sm") anonymize_btn = gr.Button(value="Synthetic", variant="primary", size="sm") placeholder_btn = gr.Button( value="Placeholder", variant="primary", size="sm" ) with gr.Column(scale=0, min_width=25): pass with gr.Column( scale=2, min_width=400, ): text_modified = gr.TextArea( label="Transformed Text", lines=13, show_copy_button=True, interactive=False, ) annotate_btn.click( fn=process, inputs=[text_original, annotate_btn], outputs=text_modified ) redact_btn.click( fn=process, inputs=[ text_original, gr.Text(value="redact", visible=False), ], outputs=text_modified, ) anonymize_btn.click( fn=process, inputs=[ text_original, gr.Text(value="synthetic", visible=False), ], outputs=text_modified, ) placeholder_btn.click( fn=process, inputs=[ text_original, gr.Text(value="placeholder", visible=False), ], outputs=text_modified, ) with gr.Row(): example_selector = gr.Dropdown( examples_truncated, type="index", label="Examples" ) example_selector.select( get_full_example, inputs=example_selector, outputs=[text_original] ) with gr.Accordion(label="Help Panel", open=False): gr.Markdown(value=policy_help_string) demo.queue().launch()