File size: 5,280 Bytes
2c4ef8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0aafd85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c4ef8c
 
 
 
 
0aafd85
2c4ef8c
 
 
 
 
 
 
 
 
 
0aafd85
 
2c4ef8c
 
 
 
 
 
 
0aafd85
 
 
 
 
2c4ef8c
0aafd85
 
 
 
 
 
 
 
 
 
 
 
2c4ef8c
 
0aafd85
 
 
0747efa
0aafd85
 
2c4ef8c
0aafd85
 
0747efa
0aafd85
 
83fdea9
20df333
0aafd85
20df333
 
2c4ef8c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from googletrans import Translator
import spacy
import gradio as gr

spacy.cli.download("en_core_web_sm")

nlp = spacy.load('en_core_web_sm')
translator = Translator()

def Sentencechunker(sentence):
    Sentchunks = sentence.split(" ")
    chunks = []
    for i in range(len(Sentchunks)):
        chunks.append(" ".join(Sentchunks[:i+1]))
    return " | ".join(chunks)

def ReverseSentenceChunker(sentence):
    reversed_sentence = " ".join(reversed(sentence.split()))
    chunks = Sentencechunker(reversed_sentence)
    return chunks

def three_words_chunk(sentence):
    words = sentence.split()
    chunks = [words[i:i+3] for i in range(len(words)-2)]
    chunks = [" ".join(chunk) for chunk in chunks]
    return " | ".join(chunks)

def keep_nouns_verbs(sentence):
    doc = nlp(sentence)
    nouns_verbs = []
    for token in doc:
        if token.pos_ in ['NOUN','VERB','PUNCT']:
            nouns_verbs.append(token.text)
    return " ".join(nouns_verbs)

def unique_word_count(text="", state=None):
    if state is None:
        state = {}
    words = text.split()
    word_counts = state
    for word in words:
        if word in word_counts:
            word_counts[word] += 1
        else:
            word_counts[word] = 1
    sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
    return sorted_word_counts,    

def Wordchunker(word):
    chunks = []
    for i in range(len(word)):
        chunks.append(word[:i+1])
    return chunks

def BatchWordChunk(sentence):
  words = sentence.split(" ")
  FinalOutput = ""
  Currentchunks = ""
  ChunksasString = ""
  for word in words:
    ChunksasString = ""
    Currentchunks = Wordchunker(word)
    for chunk in Currentchunks:
      ChunksasString += chunk + " "
    FinalOutput += "\n" + ChunksasString
  return FinalOutput

# Translate from English to French

langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")

ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")

def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
  FinalOutput = ""
  TransFinalOutput = ""
  if Chunkmode=="Chunks": 
    FinalOutput += Sentencechunker(Text)
  if Chunkmode=="Reverse":
    FinalOutput += ReverseSentenceChunker(Text)
  if Chunkmode=="Three Word Chunks": 
    FinalOutput += three_words_chunk(Text) 
  if Chunkmode=="Spelling Chunks":
    FinalOutput += BatchWordChunk(Text)
  
  if Translate: 
    TransFinalOutput = FinalOutput
    translated = translator.translate(TransFinalOutput, dest=langdest)
    FinalOutput += "\n" + translated.text
  return FinalOutput

def SepHypandSynExpansion(text):
  # Tokenize the text
  tokens = nltk.word_tokenize(text)
  NoHits = "Words to pay special attention to: "
  FinalOutput = ""

  # Find synonyms and hypernyms of each word in the text
  for token in tokens:
      synonyms = []
      hypernyms = []
      for synset in wordnet.synsets(token):
          synonyms += synset.lemma_names()
          hypernyms += [hypernym.name() for hypernym in synset.hypernyms()]
      if not synonyms and not hypernyms:
          NoHits += f"{token} | "
      else:
          FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms}"
  return NoHits, FinalOutput  

with gr.Blocks() as lliface:
  with gr.Tab("Welcome"):
    gr.HTML("<h1> Spaces Test - Still Undercontruction </h1> <p> Knowledge is a Language </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles<br>, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization<br>  Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
  with gr.Tab("Transcribe - RASMUS Whisper"):
    gr.HTML("""<a href="https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles">https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles</a>""")
    gr.Interface.load("spaces/RASMUS/Whisper-youtube-crosslingual-subtitles", title="Subtitles")
  with gr.Tab("Chunks"):
    gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
    gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation)")
  with gr.Tab("Unique words, Hypernyms and synonyms"):
    gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")
    gr.Interface(fn=SepHypandSynExpansion, inputs="text", outputs=["text", "text"], title="Word suggestions")
  with gr.Tab("Timing Practice"):
    gr.HTML("""<iframe height="1200" style="width: 100%;" scrolling="no" title="Memorisation Aid" src="https://codepen.io/kwabs22/embed/preview/GRXKQgj?default-tab=result&editable=true" frameborder="no" loading="lazy" allowtransparency="true" allowfullscreen="true">
  See the Pen <a href="https://codepen.io/kwabs22/pen/GRXKQgj">
  Memorisation Aid</a> by kwabs22 (<a href="https://codepen.io/kwabs22">@kwabs22</a>)
  on <a href="https://codepen.io">CodePen</a>.
</iframe>""")

lliface.launch()