File size: 2,641 Bytes
47d86bc
 
 
 
 
 
 
 
 
 
 
 
11d41f5
47d86bc
 
 
 
 
 
 
 
 
11d41f5
 
47d86bc
 
 
 
 
 
 
 
 
446d164
 
47d86bc
 
 
 
1157a56
 
 
 
47d86bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1157a56
 
 
 
47d86bc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
pip install -r requirements.txt
wget https://github.com/thewh1teagle/israwave/releases/download/v0.1.0/israwave.onnx
wget https://github.com/thewh1teagle/israwave/releases/download/v0.1.0/nakdimon.onnx
wget https://github.com/thewh1teagle/israwave/releases/download/v0.1.0/espeak-ng-data.tar.gz
tar xf espeak-ng-data.tar.gz

python3 app.py
"""

import gradio as gr
from israwave import IsraWave
from israwave.helpers import text_has_niqqud
from nakdimon_ort import Nakdimon
from israwave.segment import SegmentExtractor
import numpy as np

segment_extractor = SegmentExtractor()
speech_model = IsraWave('israwave.onnx', 'espeak-ng-data')
niqqud_model = Nakdimon('nakdimon.onnx')

def create_audio(text: str, rate, pitch, energy):
    if not text_has_niqqud(text):
        text = niqqud_model.compute(text)
    waveforms = []
    for segment in segment_extractor.extract_segments(text):
        waveform = speech_model.create(segment.text, rate=rate, pitch=pitch, energy=energy)
        waveforms.append(waveform.samples)
        silence = segment.create_pause(waveform.sample_rate)
        waveforms.append(silence)
    return np.concatenate(waveforms), speech_model.sample_rate

def create(text, rate, pitch, energy):
    samples, sample_rate = create_audio(text, rate, pitch, energy)
    return (sample_rate, samples)


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    # Centered title
    gr.Markdown("""
    <h1 style='text-align: center;'>IsraWave</h1>
    <p style='text-align: center;'>Text-to-Speech model for Hebrew</p>
    """)
    
    # Use Textarea with RTL direction
    text = gr.TextArea(label="text", lines=4, elem_id="rtl_textarea", value='讝讛 讻讬祝 诇讛讝诪讬谉 讚讘专讬诐 讘讗讬谞讟专谞讟, 讗讘诇 讛专讘讛 驻讞讜转 讻讬祝 诇讞讻讜转 讜诇讞讻讜转 注讚 砖讛诐 讬讙讬注讜 讗诇讬谞讜. 讗讝 诪讛 讘注爪诐 注讜讘专 注诇 讛讞讘讬诇讜转 讘讚专讱 讛讗专讜讻讛 注讚 诇讘讬转 砖诇谞讜? 讜讛讗诐 讗驻砖专 诇讙专讜诐 诇讻讱 砖讛谉 讬讙讬注讜 诪讛专 讬讜转专? ')
    rate = gr.Slider(0.1, 10, label="rate", value=1.0)
    pitch = gr.Slider(0.1, 10, label="pitch", value=1.0)
    energy = gr.Slider(0.1, 10, label="energy", value=1.0)

    button = gr.Button("Create", elem_id="create_button")
    output = gr.Audio()
    
    button.click(fn=create, inputs=[text, rate, pitch, energy], outputs=output)

    # Custom CSS for RTL direction
    demo.css = """
    #rtl_textarea textarea {
        direction: rtl;
        font-size: 20px;
    }
    """
    
    gr.Markdown("""
    <p style='text-align: center;'><a href='https://github.com/thewh1teagle/israwave' target='_blank'>Israwave on Github</a></p>
    """)

demo.launch()