Spaces:

gigant
/

word_graph_viz

Sleeping

App Files Files Community

gigant commited on Dec 26, 2022

Commit

0cf0d6f

•

1 Parent(s): 4f1f3ca

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -2

app.py CHANGED Viewed

@@ -7,11 +7,87 @@ import spacy
 import gradio as gr
 import en_core_web_trf
 import numpy as np
 dataset = load_dataset("gigant/tib_transcripts")
 nlp = en_core_web_trf.load()
 def half_circle_layout(n_nodes, sentence_node=True):
   pos = {}
   for i_node in range(n_nodes - 1):
@@ -127,19 +203,23 @@ def convert_jraph_to_networkx_graph(jraph_graph: jraph.GraphsTuple) -> nx.Graph:
           int(senders[e]), int(receivers[e]), edge_feature=edges[e])
   return nx_graph
-def plot_graph_sentence(sentence, graph_type="both"):
   # sentences = dataset["train"][0]["abstract"].split(".")
   docs = dependency_parser([sentence])
   if graph_type == "dependency":
     graphs = construct_dependency_graph(docs)
   elif graph_type == "structural":
     graphs = construct_structural_graph(docs)
-  elif graph_type == "both":
     graphs = construct_both_graph(docs)
   g = to_jraph(graphs[0])
   adj_mat = get_adjacency_matrix(g)
   nx_graph = convert_jraph_to_networkx_graph(g)
   pos = half_circle_layout(len(graphs[0]["nodes"]))
   plot = plt.figure(figsize=(12, 6))
   nx.draw(nx_graph, pos=pos,
           labels={i: e for i,e in enumerate(graphs[0]["nodes"])},
@@ -160,6 +240,8 @@ def get_list_sentences(id):
   return gr.update(choices = dataset["train"][id]["transcript"].split("."))
 with gr.Blocks() as demo:
   with gr.Tab("From transcript"):
     with gr.Row():
       with gr.Column():

 import gradio as gr
 import en_core_web_trf
 import numpy as np
+import benepar
+import re
 dataset = load_dataset("gigant/tib_transcripts")
 nlp = en_core_web_trf.load()
+benepar.download('benepar_en3')
+nlp.add_pipe('benepar', config={'model': 'benepar_en3'})
+def parse_tree(sentence):
+    stack = []  # or a `collections.deque()` object, which is a little faster
+    top = items = []
+    for token in filter(None, re.compile(r'(?:([()])|\s+)').split(sentence)):
+        if token == '(':
+            stack.append(items)
+            items.append([])
+            items = items[-1]
+        elif token == ')':
+            if not stack:
+                raise ValueError("Unbalanced parentheses")
+            items = stack.pop()
+        else:
+            items.append(token)
+    if stack:
+        raise ValueError("Unbalanced parentheses")
+    return top
+class Tree():
+  def __init__(self, name, children):
+    self.children = children
+    self.name = name
+    self.id = None
+  def set_id_rec(self, id=0):
+    self.id = id
+    last_id=id
+    for child in self.children:
+      last_id = child.set_id_rec(id=last_id+1)
+    return last_id
+  def set_all_ids(self):
+    self.set_id_rec(0)
+  def print_tree(self, level=0):
+    to_print = f'|{"-" * level} {self.name} ({self.id})'
+    for child in self.children:
+      to_print += f"\n{child.print_tree(level + 1)}"
+    return to_print
+  def __str__(self):
+    return self.print_tree(0)
+  def get_list_nodes(self):
+    return [self.name] + [_ for child in self.children for _ in child.get_list_nodes()]
+def rec_const_parsing(list_nodes):
+  if isinstance(list_nodes, list):
+    name, children = list_nodes[0], list_nodes[1:]
+  else:
+    name, children = list_nodes, []
+  return Tree(name, [rec_const_parsing(child) for i, child in enumerate(children)])
+def tree_to_graph(t):
+  senders = []
+  receivers = []
+  for child in t.children:
+    senders.append(t.id)
+    receivers.append(child.id)
+    s_rec, r_rec = tree_to_graph(child)
+    senders.extend(s_rec)
+    receivers.extend(r_rec)
+  return senders, receivers
+def construct_constituency_graph(docs):
+  doc = docs[0]
+  sent = list(doc.sents)[0]
+  print(sent._.parse_string)
+  t = rec_const_parsing(parse_tree(sent._.parse_string)[0])
+  t.set_all_ids()
+  senders, receivers = tree_to_graph(t)
+  nodes = t.get_list_nodes()
+  graphs = [{"nodes": nodes, "senders": senders, "receivers": receivers, "edge_labels": {}}]
+  return graphs
 def half_circle_layout(n_nodes, sentence_node=True):
   pos = {}
   for i_node in range(n_nodes - 1):
           int(senders[e]), int(receivers[e]), edge_feature=edges[e])
   return nx_graph
+def plot_graph_sentence(sentence, graph_type="constituency"):
   # sentences = dataset["train"][0]["abstract"].split(".")
   docs = dependency_parser([sentence])
   if graph_type == "dependency":
     graphs = construct_dependency_graph(docs)
   elif graph_type == "structural":
     graphs = construct_structural_graph(docs)
+  elif graph_type == "structural+dependency":
     graphs = construct_both_graph(docs)
+  elif graph_type == "constituency":
+    graphs = construct_constituency_graph(docs)
   g = to_jraph(graphs[0])
   adj_mat = get_adjacency_matrix(g)
   nx_graph = convert_jraph_to_networkx_graph(g)
   pos = half_circle_layout(len(graphs[0]["nodes"]))
+  if graph_type == "constituency":
+    pos = nx.planar_layout(nx_graph)
   plot = plt.figure(figsize=(12, 6))
   nx.draw(nx_graph, pos=pos,
           labels={i: e for i,e in enumerate(graphs[0]["nodes"])},
   return gr.update(choices = dataset["train"][id]["transcript"].split("."))
 with gr.Blocks() as demo:
+  with gr.Row():
+    graph_type = gr.Dropdown(label="Graph type", choices=["structural", "dependency", "structural+dependency", "constituency"], value="structural+dependency")
   with gr.Tab("From transcript"):
     with gr.Row():
       with gr.Column():