Spaces:

getapi
/

mstts

Sleeping

File size: 10,920 Bytes

import {serve} from "https://deno.land/std/http/server.ts";
import {EdgeSpeechTTS} from "https://esm.sh/@lobehub/tts@1";

async function synthesizeSpeech(model: string, voice: string, text: string) {
    let voiceName;
    let rate = 0;
    let pitch = 0;

    if (!model.includes("Neural")) {
        switch (model) {
            case "ava":
                voiceName = "en-US-AvaMultilingualNeural";
                break;
            case "andrew":
                voiceName = "en-US-AndrewMultilingualNeural";
                break;
            case "emma":
                voiceName = "en-US-EmmaMultilingualNeural";
                break;
            case "brian":
                voiceName = "en-US-BrianMultilingualNeural";
                break;
            case "vivienne":
                voiceName = "fr-FR-VivienneMultilingualNeural";
                break;
            case "remy":
                voiceName = "fr-FR-RemyMultilingualNeural";
                break;
            case "seraphina":
                voiceName = "de-DE-SeraphinaMultilingualNeural";
                break;
            case "florian":
                voiceName = "de-DE-FlorianMultilingualNeural";
                break;
            case "dmitry":
                voiceName = "ru-RU-DmitryNeural";
                break;
            case "svetlana":
                voiceName = "ru-RU-SvetlanaNeural";
                break;
            default:
                voiceName = "en-US-BrianMultilingualNeural";
                break;
        }
    } else {
        voiceName = model;
        const params = Object.fromEntries(voice.split("|").map((p) => p.split(":") as [string, string]));
        rate = Number(params["rate"] || 0);
        pitch = Number(params["pitch"] || 0);
    }

    const tts = new EdgeSpeechTTS();

    const payload = {
        input: text, options: {
            rate: rate, pitch: pitch, voice: voiceName
        },
    };
    const response = await tts.create(payload);
    const mp3Buffer = new Uint8Array(await response.arrayBuffer());
    return new Response(mp3Buffer, {
        headers: {"Content-Type": "audio/mpeg"},
    });
}

function validateContentType(req: Request, expected: string) {
    const contentType = req.headers.get("Content-Type");
    if (contentType !== expected) {
        console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
        return new Response("Bad Request", {status: 400});
    }
}

async function handleDebugRequest() {
    const voice = "rate:0.0|pitch:0.0";
    const model = "en-US-BrianMultilingualNeural";
    const text = "Приветик! Надеюсь ты меня хорошо слышишь? Алё?!";
    console.log(`model=${model}, voice=${voice}, text=${text}`);
    return synthesizeSpeech(model, voice, text);
}

async function handleSynthesisRequest(req: Request) {
    if (req.method !== "POST") {
        return new Response("Method Not Allowed", {status: 405});
    }
    const invalidContentType = validateContentType(req, "application/json");
    if (invalidContentType) return invalidContentType;
    const {model, input, voice} = await req.json();
    return synthesizeSpeech(model, voice, input);
}


async function handleDemoRequest(req: Request) {
    const html = `<!DOCTYPE html>
  <html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta content="width=device-width, initial-scale=1.0" name="viewport" />
    <title>tts</title>
    <style>
  body {
    background-color: #121212;
    color: #e0e0e0;
    font-family: Arial, sans-serif;
    margin: 0;
    padding: 20px;
  }

  .container {
    max-width: 800px;
    margin: 0 auto;
    padding: 20px;
    background-color: #1e1e1e;
    border-radius: 8px;
    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
  }

  .input-area, .output-area {
    margin-bottom: 20px;
  }

  .slider-container, .textarea-container, .dropdown-container {
    margin-bottom: 20px;
  }

  label {
    display: block;
    margin-bottom: 8px;
    font-weight: bold;
  }

  input[type="range"] {
    width: 100%;
  }

  .slider-value {
    text-align: center;
    margin-top: 8px;
  }

  textarea {
    max-width: 780px;
    width: calc(100% - 20px);
    height: 100px;
    padding: 10px;
    border: 1px solid #333;
    border-radius: 4px;
    background-color: #2e2e2e;
    color: #e0e0e0;
    resize: none;
    }

  select {
    width: 100%;
    padding: 10px;
    border: 1px solid #333;
    border-radius: 4px;
    background-color: #2e2e2e;
    color: #e0e0e0;
  }

  button {
    width: 100%;
    padding: 10px;
    border: none;
    border-radius: 4px;
    background-color: #6200ea;
    color: #fff;
    font-size: 16px;
    cursor: pointer;
    transition: background-color 0.3s;
  }

  button:hover {
    background-color: #3700b3;
  }

  h1 {
    font-size: 24px;
    margin-bottom: 20px;
  }

  a {
    color: #bb86fc;
    text-decoration: none;
  }

  a:hover {
    text-decoration: underline;
  }

  #audioPlayerContainer {
    text-align: center;
  }

  audio {
    width: 100%;
    max-width: 600px;
    margin: 10px 0;
  }

  a {
    display: block;
    margin: 10px 0;
  }
  pre {
      color: #94c890;
      background: #000000;
      padding: 5px 10px;
      margin: 0;
      font-size: 1.12em;
  }
</style>

  </head>
  <body>
    <div class="container">
      <div class="input-area">
        <div class="textarea-container">
          <label for="inputText">текст:</label
          ><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
        </div>
        <div class="dropdown-container">
          <label for="voiceSelect">голос:</label>
          <select id="voiceSelect"></select>
        </div>
        <button id="synthesizeButton">синтезировать</button>
      </div>
      <div class="output-area">
        <div id="audioPlayerContainer"></div>
      </div>
      <details>
        <summary>api</summary>
        <p>получить список голосов:</p>
        <pre id="apiVoices"></pre>
        <p>post-запрос для синтеза голоса из текста:</p>
        <pre id="apiExamples"></pre>
      </details>
    </div>
 <script>
    let audio = null;
    
    document.getElementById('synthesizeButton').addEventListener('click', () => {
        const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?';
        const rate = '0.0';
        const pitch = '0.0';
        const voice = \`rate:\${rate}|pitch:\${pitch}\`;
        const model = document.getElementById('voiceSelect').value;

        if (audio) {
            audio.pause();
            audio.currentTime = 0;
        }

        fetch('/v1/audio/speech', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ model, input: text, voice })
        })
        .then(response => response.blob())
        .then(blob => {
        const audioUrl = URL.createObjectURL(blob);
        const audioPlayerContainer = document.getElementById('audioPlayerContainer');
    
        if (audio) {
            audio.pause();
            audioPlayerContainer.innerHTML = '';
        }
    
        audio = new Audio(audioUrl);
        audio.controls = true;
        audioPlayerContainer.appendChild(audio);
    
        const downloadLink = document.createElement('a');
        downloadLink.href = audioUrl;
        downloadLink.download = 'synthesized_voice.mp3';
        downloadLink.textContent = 'скачать аудио';
        downloadLink.style.display = 'block';
        downloadLink.style.marginTop = '10px';
    
        audioPlayerContainer.appendChild(downloadLink);
        audio.play();
    });

    });
    
   async function fetchModels() {
        try {
          const response = await fetch('/v1/audio/models');
          const models = await response.json();
          const voiceSelect = document.getElementById('voiceSelect');

         models.forEach((model, index) => {
            const option = document.createElement('option');
            option.value = model.model;
            option.textContent = model.model;
            if (index === 1) {option.selected = true;}
            voiceSelect.appendChild(option);
          });
        } catch (error) {
          console.error('ошибка при получении списка моделей:', error);
        }
      }
      fetchModels();
      
      function createApiExamples() {
          const apiExamples = document.getElementById('apiExamples');
          const apiVoices = document.getElementById('apiVoices');
          const currentUrl = window.location.origin;
          const voices_pre = \`curl \${currentUrl}/v1/audio/models\`;
          const examples_pre = \`curl \${currentUrl}/v1/audio/speech \\\\\\\\
-H 'content-type: application/json' \\\\\\\\
--data-raw '{"model":"brian","input":"привет! хрю-хрю!","voice":"rate:0|pitch:0"}' \\\\\\\\
-o tts_voice.mp3
\`;
          apiVoices.textContent = voices_pre.replace(/\\\\\\\\/g, '\\\\');
          apiExamples.textContent = examples_pre.replace(/\\\\\\\\/g, '\\\\');
          
        }
   createApiExamples();
 </script>
 </body></html>`;

    return new Response(html, {
        headers: {"Content-Type": "text/html"},
    });
}

async function handleVoiceList() {
    let voices = [{model: 'ava', gender: 'female'}, {model: 'andrew', gender: 'male'}, {model: 'emma', gender: 'female'}, {model: 'brian', gender: 'male'}, {model: 'vivienne', gender: 'female'}, {model: 'remy', gender: 'male'}, {
        model: 'seraphina', gender: 'female'
    }, {model: 'florian', gender: 'male'}, {model: 'dmitry', gender: 'male'}, {model: 'svetlana', gender: 'female'}];

    const sortedVoiceList = voices.sort((a, b) => {
        if (a.gender === 'male' && b.gender === 'female') return -1;
        if (a.gender === 'female' && b.gender === 'male') return 1;
        return 0;
    });

    return new Response(JSON.stringify(sortedVoiceList), {
        headers: {"Content-Type": "application/json"},
    });

}


serve(async (req) => {
    try {
        const url = new URL(req.url);

        if (url.pathname === "/") {
            return handleDemoRequest(req);
        }
        if (url.pathname === "/v1/audio/models") {
            return handleVoiceList();
        }
        if (url.pathname === "/tts") {
            return handleDebugRequest();
        }

        if (url.pathname !== "/v1/audio/speech") {
            console.log(`Unhandled path ${url.pathname}`);
            return new Response("Not Found", {status: 404});
        }

        return handleSynthesisRequest(req);
    } catch (err) {
        console.error(`Error processing request: ${err.message}`);
        return new Response(`Internal Server Error\n${err.message}`, {
            status: 500,
        });
    }
}, { port: 7860 });