"Dal testo al parlato" mediante le funzioni esterne del API di Pico TTS

Da Gambas-it.org - Wikipedia.

La tecnologia di sintesi vocale TTS (Text-To-Speech), ossia "dal Testo al Parlato", consente di leggere con una voce umana sintetizzata un testo scritto, riproducendo così i suoni corrispondenti al testo.

Le risorse della libreria di Pico TSS consentono di ottenere tale risultato, ed in particolare di leggere testo di sei fonemi diversi: inglese americano, inglese britannico, tedesco, spagnolo, francese ed italiano.

Per poter utilizzare tali risorse di Pico TSS bisognerà installare nel proprio sistema la libreria: libttspico.so.0.0.0 .


Mostriamo di seguito un semplice esempio, nel quale si sottoporrà alla conversione di sintesi vocale un breve testo in italiano. In particolare il codice riprodurrà vocalmente il testo in un file WAV, nonché, utilizzando le fondamentali funzioni di ALSA, consentirà di ascoltare immediatamente quanto scritto nel testo.

Private Const MAX_OUTBUF_SIZE As Byte = 128
Private Const PICO_MEM_SIZE As Integer = 2500000
Private Const PICO_MAX_DATAPATH_NAME_SIZE As Byte = 128
Private Const PICO_MAX_FILE_NAME_SIZE As Byte = 64
Private Const PICO_LINGWARE_PATH As String = "/usr/share/pico/lang/"
Private Const PICO_MAX_RESOURCE_NAME_SIZE As Byte = 32
Private Const PICO_VOICE_NAME As String = "PicoVoice"
Private Const PICO_STEP_IDLE As Byte = 200                  ' No more input text available in the Pico text input buffer
Private Const PICO_STEP_BUSY As Byte = 201                  ' Process input and producing speech output
Private Const PICO_STEP_ERROR As Short = -200           
Private Const PICO_EXC_UNEXPECTED_FILE_TYPE As Short = -41
Private Const SAMPLE_FREQ_16KHZ As Integer = 16000
Private Const FORMAT_TAG_LIN As Byte = 1                    ' linear 16 - bit encoding
Private Const SND_PCM_STREAM_PLAYBACK As Byte = 0
Private Const SND_PCM_FORMAT_S16_LE As Byte = 2
Private Const SND_PCM_ACCESS_RW_INTERLEAVED As Byte = 3
Private Const nomen As String = "default"
Private picoInternalTaLingware As String[] = ["en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin"]
Private picoInternalSgLingware As String[] = ["en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin"]
 

Library "libttspico:0.0.0"

' int pico_initialize(void *memory, const pico_Uint32 size, pico_System *outSystem)
' Initializes the Pico system and returns its handle in 'outSystem'.
Private Extern pico_initialize(memP As Pointer, size As Integer, outSystem As Pointer) As Integer

' int pico_loadResource(pico_System system, const pico_Char *resourceFileName, pico_Resource *outResource)
' Loads a resource file into the Pico system.
Private Extern pico_loadResource(pico_System As Pointer, resFileName$ As String, pico_Resource As Pointer) As Integer

' int pico_getResourceName(pico_System system, pico_Resource resource, pico_Retstring outName)
' Gets the unique resource name of a loaded resource.
Private Extern pico_getResourceName(pico_System As Pointer, pico_Resource As Pointer, outName As Pointer) As Integer

' int pico_createVoiceDefinition(pico_System system, const pico_Char *voiceName)
' Creates a voice definition.
Private Extern pico_createVoiceDefinition(pico_System As Pointer, voiceName As String) As Integer

' int pico_addResourceToVoiceDefinition(pico_System system, const pico_Char *voiceName, const pico_Char *resourceName)
' Adds a mapping pair ('voiceName', 'resourceName') to the voice definition.
Private Extern pico_addResourceToVoiceDefinition(pico_System As Pointer, voiceName As String, resourceName As Pointer) As Integer

' int pico_newEngine(pico_System system, const pico_Char *voiceName, pico_Engine *outEngine)
' Creates and initializes a new Pico engine instance and returns its handle in 'outEngine'.
Private Extern pico_newEngine(pico_System As Pointer, voiceName As String, outEngine As Pointer) As Integer

' picoos_Common pico_sysGetCommon(pico_System this)
Private Extern pico_sysGetCommon(pico_System As Pointer) As Pointer

' picoos_bool picoos_sdfOpenOut (picoos_Common g, picoos_SDFile * sdFile, picoos_char fileName[], int sf, picoos_encoding_t enc)
Private Extern picoos_sdfOpenOut(commP As Pointer, sdFile As Pointer, fileName As Pointer, sf As Integer, enc As Byte) as Boolean

' picoos_bool picoos_sdfCloseOut (picoos_Common g, picoos_SDFile * sdFile)
Private Extern picoos_sdfCloseOut(commP As Pointer, sdFile As Pointer) as Boolean

' int pico_putTextUtf8(pico_Engine engine, const pico_Char *text, const pico_Int16 textSize, pico_Int16 *outBytesPut)
' Puts text 'text' encoded in UTF8 into the Pico text input buffer
Private Extern pico_putTextUtf8(outEngine As Pointer, texP As Pointer, textsize As Short, outBytesPut As Pointer) As Integer

' int pico_getData(pico_Engine engine, void *outBuffer, const pico_Int16 bufferSize, pico_Int16 *outBytesReceived, pico_Int16 *outDataType)
' Gets speech data from the engine
Private Extern pico_getData(engine As Pointer, outBuffer As Pointer, bufferSize As Short, outBytesReceived As Pointer, outDataType As Pointer) As Integer
 
' pico_disposeEngine(pico_System system, pico_Engine *inoutEngine)
' Disposes a Pico engine and releases all memory it occupied.
Private Extern pico_disposeEngine(pico_System As Pointer, inoutEngine As Pointer)

' pico_releaseVoiceDefinition(pico_System system, const pico_Char *voiceName)
' Releases the voice definition 'voiceName'.
Private Extern pico_releaseVoiceDefinition(pico_System As Pointer, voiceName As String)

' pico_unloadResource(pico_System system, pico_Resource *inoutResource)
' Unloads a resource file from the Pico system.
Private Extern pico_unloadResource(pico_System As Pointer, inoutResource As Pointer)

' pico_terminate(pico_System *system)
' Terminates the Pico system.
Private Extern pico_terminate(pico_System As Pointer)


Library "libasound:2"

' int snd_pcm_open(snd_pcm_t **pcm, const char *name, snd_pcm_stream_t stream, int mode)
' Opens a PCM.
Private Extern snd_pcm_open(pcmP As Pointer, nome As String, stream As Integer, mode As Integer) As Integer

' const char * snd_strerror (int errnum)
' Returns the message For an Error code.
Private Extern snd_strerror(errnum As Integer) As String

' int snd_pcm_set_params (snd_pcm_t *pcm, snd_pcm_format_t format, snd_pcm_access_t access, unsigned int canali, unsigned int rate, int soft_resample, unsigned int latency)
' Set the hardware And software parameters
Private Extern snd_pcm_set_params(pcmP As Pointer, formatB As Byte, accessB As Byte, canali As Integer, rate As Integer, soft_resample As Integer, latency As Integer) As Integer
 
' snd_pcm_sframes_t snd_pcm_writei(snd_pcm_t *pcm, const void *buffer, snd_pcm_uframes_t size)
' Write interleaved frames to a PCM.
Private Extern snd_pcm_writei(pcmP As Pointer, buffP As Pointer, uframes As Long) As Integer

' int snd_pcm_close(snd_pcm_t **pcm)
' Close PCM handle.
Private Extern snd_pcm_close(pcmP As Pointer)
 

Public Sub Main()

 Dim local_text, picoMemArea, picoSystem As Pointer
 Dim picoTaResource, picoSgResource, picoTaResourceName As Pointer
 Dim picoSgResourceName, picoEngine, common, outBytesPut As Pointer
 Dim testo, wav, sdOutFile, outbuf, handle As Pointer
 Dim buffer As Byte[]
 Dim resourceFileName, picoSgFileName As String
 Dim buffersize As Short = 256
 Dim ret, getstatus, err, obr, frames, letti As Integer
 Dim st As Stream
 Dim bytes_sent, bytes_recv, text_remaining, out_data_type As Short
 Dim frequenza As Integer = 8000
 Dim canali As Byte = 2

' Apre il sub-sistema PCM di ALSA e gli passa dei valori per i parametri della riproduzione audio:
  err = snd_pcm_open(VarPtr(handle), nomen, SND_PCM_STREAM_PLAYBACK, 0)
  If err < 0 Then Error.Raise("Errore nell'apertura dell'interfaccia PCM di Alsa: " & snd_strerror(err))

  err = snd_pcm_set_params(handle, SND_PCM_FORMAT_S16_LE, SND_PCM_ACCESS_RW_INTERLEAVED, canali, frequenza, 1, 500000)
  If err < 0 Then Error.Raise("Errore nell'impostazione dei parametri dell'interfaccia PCM di Alsa: " & snd_strerror(err))
    
' Impostiamo il testo, al quale applicare la sintesi vocale:
  testo = Alloc("www.gambas-it.org è il sito della comunità italiana dei programmatori Gambas.")
 
  outbuf = Alloc(CInt(MAX_OUTBUF_SIZE / 2))
   
  picoMemArea = Alloc(PICO_MEM_SIZE)
   
' Inizializza la libreria 'libttspico':
  ret = pico_initialize(picoMemArea, PICO_MEM_SIZE, VarPtr(picoSystem))
  If ret < 0 Then Error.Raise("Impossibile inizializzare la libreria 'libttspico' !")

' Carica il file lingware di risorse di analisi del testo.
' Per modificare la lingua di sintesi, cambiare il valore dell'indice della variabile "picoInternalTaLingware[]":
  resourceFileName = PICO_LINGWARE_PATH & picoInternalTaLingware[5]

  ret = pico_loadResource(picoSystem, resourceFileName, VarPtr(picoTaResource)) 
  If ret < 0 Then Error.Raise("Errore alla funzione 'pico_loadResource' !")
   
' Carica il file lingware di risorse di generazione del segnale.
' Per modificare la lingua di sintesi, cambiare il valore dell'indice della variabile "picoInternalSgLingware[]":
  picoSgFileName = PICO_LINGWARE_PATH & picoInternalSgLingware[5]
   
  ret = pico_loadResource(picoSystem, picoSgFileName, VarPtr(picoSgResource))
      
' Ottiene il nome della risorsa di analisi del testo:
  picoTaResourceName = Alloc(PICO_MAX_RESOURCE_NAME_SIZE)
   
  ret = pico_getResourceName(picoSystem, picoTaResource, picoTaResourceName)
  
' Ottiene il nome della risorsa di generazione del segnale:
  picoSgResourceName = Alloc(PICO_MAX_RESOURCE_NAME_SIZE)
   
  ret = pico_getResourceName(picoSystem, picoSgResource, picoSgResourceName)
  
' Crea una definizione vocale:
  ret = pico_createVoiceDefinition(picoSystem, PICO_VOICE_NAME)

' Aggiunge alla voce la risorsa di analisi del testo:
  ret = pico_addResourceToVoiceDefinition(picoSystem, PICO_VOICE_NAME, picoTaResourceName)

' Aggiunge alla voce la risorsa di generazione del segnale:
  ret = pico_addResourceToVoiceDefinition(picoSystem, PICO_VOICE_NAME, picoSgResourceName)

' Crea a nuovo motore di sintetizzazione Pico:
  ret = pico_newEngine(picoSystem, PICO_VOICE_NAME, VarPtr(picoEngine))

' Imposta il percorso ed il nome del file wav che sarà salvato:
  wav = Alloc("/percorso/del/file.wav")

  local_text = testo
   
  text_remaining = Len(String@(local_text)) + 1
  
  common = pico_sysGetCommon(picoSystem)
   
' Apre il file wav:
  picoos_sdfOpenOut(common, VarPtr(sdOutFile), wav, SAMPLE_FREQ_16KHZ, FORMAT_TAG_LIN)
    
  outBytesPut = Alloc(SizeOf(gb.Short))
  
  buffer = New Byte[bufferSize]
  
  obr = frequenza * 16 * canali
  
  While text_remaining
     
' Invia i dati al motore di sintetizzazione Pico:
    ret = pico_putTextUtf8(picoEngine, testo, text_remaining, outBytesPut)
    bytes_sent = Short@(outBytesPut)
    text_remaining -= bytes_sent
    
' Ciclo per inserire i campioni audio nel buffer:
    Do
    
      getstatus = pico_getData(picoEngine, outbuf, MAX_OUTBUF_SIZE, VarPtr(bytes_recv), VarPtr(out_data_type))
      st = Memory outbuf For Write
      buffer.Read(st, 0, bytes_recv)
      st.Close
       
' Gli stessi dati vengono inviati ad Alsa per consentire l'ascolto immediato del testo:
      frames = snd_pcm_writei(handle, buffer.Data, bytes_recv / SizeOf(gb.Integer))
      If (frames < 0) Then Error.Raise("Errore alla funzione 'snd_pcm_writei': " & snd_strerror(err))
      
      Write #File.Out, "\rTempo trascorso: " & Date(0, 0, 0, 0, 0, 0, ((letti * 8) / obr) * 1000)

      letti += bytes_recv
 
    Loop While PICO_STEP_BUSY = getstatus
     
  Wend
   
' Chiude il file wav:
  picoos_sdfCloseOut(common, VarPtr(sdOutFile))

' Chiude l'handle della libreria Alsa:
  snd_pcm_close(handle)
 
' Chiude infine anche tutte le risorse della libreria "libttspico":
  If IsNull(picoEngine) = False Then
    pico_disposeEngine(picoSystem, VarPtr(picoEngine))
    pico_releaseVoiceDefinition(picoSystem, PICO_VOICE_NAME)
    picoEngine = Null
  Endif
   
  If IsNull(picoSgResource) = False Then
    pico_unloadResource(picoSystem, VarPtr(picoSgResource))
    picoSgResource = Null
  Endif
   
  If IsNull(picoTaResource) = False Then
    pico_unloadResource(picoSystem, VarPtr(picoTaResource))
    picoTaResource = Null
  Endif
  
  If IsNull(picoSystem) = False Then
    pico_terminate(VarPtr(picoSystem))
    picoSystem = Null
  Endif
   
' Libera la memoria allocata:
  Free(outBytesPut)
  Free(wav)
  Free(picoMemArea)
  Free(picoTaResourceName)
  Free(picoSgResourceName)
  Free(outbuf)
  Free(testo)

End



Riferimenti