KwabsHug commited on
Commit
2c4ef8c
1 Parent(s): ba272ff

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from googletrans import Translator
2
+ import spacy
3
+ import gradio as gr
4
+
5
+ spacy.cli.download("en_core_web_sm")
6
+
7
+ nlp = spacy.load('en_core_web_sm')
8
+ translator = Translator()
9
+
10
+ def Sentencechunker(sentence):
11
+ Sentchunks = sentence.split(" ")
12
+ chunks = []
13
+ for i in range(len(Sentchunks)):
14
+ chunks.append(" ".join(Sentchunks[:i+1]))
15
+ return " | ".join(chunks)
16
+
17
+ def ReverseSentenceChunker(sentence):
18
+ reversed_sentence = " ".join(reversed(sentence.split()))
19
+ chunks = Sentencechunker(reversed_sentence)
20
+ return chunks
21
+
22
+ def three_words_chunk(sentence):
23
+ words = sentence.split()
24
+ chunks = [words[i:i+3] for i in range(len(words)-2)]
25
+ chunks = [" ".join(chunk) for chunk in chunks]
26
+ return " | ".join(chunks)
27
+
28
+ def keep_nouns_verbs(sentence):
29
+ doc = nlp(sentence)
30
+ nouns_verbs = []
31
+ for token in doc:
32
+ if token.pos_ in ['NOUN','VERB','PUNCT']:
33
+ nouns_verbs.append(token.text)
34
+ return " ".join(nouns_verbs)
35
+
36
+ def unique_word_count(text="", state=None):
37
+ if state is None:
38
+ state = {}
39
+ words = text.split()
40
+ word_counts = state
41
+ for word in words:
42
+ if word in word_counts:
43
+ word_counts[word] += 1
44
+ else:
45
+ word_counts[word] = 1
46
+ sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
47
+ return sorted_word_counts,
48
+
49
+ """
50
+ sentence = "Please help me create a sentence chunker"
51
+ sentencechunks = Sentencechunker(sentence)
52
+ reversed_chunks = ReverseSentenceChunker(sentence)
53
+ TWchunks = three_words_chunk(sentence)
54
+ nouns_verbs = keep_nouns_verbs(sentence)
55
+ """
56
+
57
+ # Translate from English to French
58
+
59
+ langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")
60
+
61
+ """
62
+ def VarTrans(text, langdest):
63
+ translated = translator.translate(text, dest=langdest)
64
+ SCtranslated = translator.translate(sentencechunks, dest=langdest)
65
+ RCtranslated = translator.translate(reversed_chunks, dest=langdest)
66
+ TWCtranslated = translator.translate(TWchunks, dest=langdest)
67
+ return translated, SCtranslated, RCtranslated, TWCtranslated
68
+ """
69
+
70
+ ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks"], label="Choose Chunk Type")
71
+
72
+ def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
73
+ FinalOutput = ""
74
+ TransFinalOutput = ""
75
+ if Chunkmode=="Chunks":
76
+ FinalOutput += Sentencechunker(Text)
77
+ if Chunkmode=="Reverse":
78
+ FinalOutput += ReverseSentenceChunker(Text)
79
+ if Chunkmode=="Three Word Chunks":
80
+ FinalOutput += three_words_chunk(Text)
81
+
82
+ if Translate:
83
+ TransFinalOutput = FinalOutput
84
+ translated = translator.translate(TransFinalOutput, dest=langdest)
85
+ FinalOutput += "\n" + translated.text
86
+ return FinalOutput
87
+
88
+ """
89
+ print(translated.text)
90
+ print(sentencechunks)
91
+ print(SCtranslated.text)
92
+ print(reversed_chunks)
93
+ print(RCtranslated.text)
94
+ print(TWchunks)
95
+ print(TWCtranslated.text)
96
+ print(nouns_verbs)
97
+ """
98
+
99
+ def Wordchunker(word):
100
+ chunks = []
101
+ for i in range(len(word)):
102
+ chunks.append(word[:i+1])
103
+ return chunks
104
+
105
+ word = "please"
106
+ wordchunks = Wordchunker(word)
107
+ print("\n")
108
+ print(wordchunks)
109
+
110
+ #random_chunk_display(TWCtranslated.text)
111
+
112
+ with gr.Blocks() as lliface:
113
+ gr.HTML("<p> Still Undercontruction </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization, Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
114
+ gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
115
+ gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation")
116
+ gr.HTML("Add a codepen pen page here")
117
+ gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")
118
+
119
+ lliface.launch()