File size: 1,615 Bytes
49664ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import gradio as gr
import Wikidata_Text_Parser as wtr
import sqlite3

def process_input(qid):
    progress = gr.Progress(0)
    
    wtr.claimParser(qid)
    
    progress(0.20, desc="Filtering properties...")
    filtered_df = wtr.propertyFiltering(qid)
    
    progress(0.40, desc="Parsing URLs...")
    url_set = wtr.urlParser()
    
    progress(0.60, desc="Parsing HTML...")
    html_set = wtr.htmlParser(url_set)
    
    progress(0.80, desc="Generating claim text...")
    claim_text = wtr.claim2text(html_set) #Claims generation
    
    progress(1, desc="Generating claim text...")
    html_text = wtr.html2text(html_set)

    conn = sqlite3.connect('wikidata_claims_refs_parsed.db')
    claim_text = claim_text.astype(str)
    html_text = html_text.astype(str)
    claim_text.to_sql('claim_text', conn, if_exists='replace', index=False)
    html_text.to_sql('html_text', conn, if_exists='replace', index=False)
    conn.commit()
    conn.close()
    return f"{html_text.shape[0]} HTMl documents collection via references of {qid}"

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # Reference Quality Verification Tool
        This is a tool for verifying the reference quality of Wikidata claims related to the target entity item.

        Parsing could take 3~5 mins depending on the number of references.
        """
    )
    
    inp = gr.Textbox(label="Input QID", placeholder="Input QID (i.e. Q42)")
    out = gr.Textbox(label="Parsing result")
    run_button = gr.Button("Start parsing")
    run_button.click(process_input, inp, out)



if __name__ == "__main__":
    demo.launch()