"Dal testo al parlato" mediante le funzioni esterne del API di Pico TTS

Da Gambas-it.org - Wikipedia.
Versione del 19 set 2013 alle 17:55 di Vuott (Discussione | contributi) (Creata pagina con 'La tecnologia di sinstesi vocale '''TTS''' (''Text-To-Speech''), ossia "''dal Testo al Parlato''", consente di leggere con una voce umana sintetizzata un testo scritto, riprod...')

(diff) ← Versione meno recente | Versione attuale (diff) | Versione più recente → (diff)

La tecnologia di sinstesi vocale TTS (Text-To-Speech), ossia "dal Testo al Parlato", consente di leggere con una voce umana sintetizzata un testo scritto, riproducendo così i suoni corrispondenti al testo.

Le risorse della libreria di Pico TSS consentono di ottenere tale risultato, ed in particolare di leggere testo di sei fonemi diversi: inglese americano, inglese britannico, francese, spagnolo tedesco ed italiano.

Per poter utilizzare tali risorse di Pico TSS bisognerà installare nel proprio sistema la libreria: libttspico.so.0.0.0 .


Mostriamo di seguito un semplice esempio, nel quale si sottoporrà alla conversione di sintesi vocale un breve testo in italiano. In particolare il codice riprodurrà vocalmente il testo in un file WAV, nonché, utilizzando le fondamentali funzioni di ALSA, consentirà di ascoltare immediatamente quanto scritto nel testo.

Private Const MAX_OUTBUF_SIZE As Byte = 128
Private Const PICO_MEM_SIZE As Integer = 2500000
Private Const PICO_MAX_DATAPATH_NAME_SIZE As Byte = 128
Private Const PICO_MAX_FILE_NAME_SIZE As Byte = 64
Private Const PICO_LINGWARE_PATH As String = "/usr/share/pico/lang/"
Private Const PICO_MAX_RESOURCE_NAME_SIZE As Byte = 32
Private Const PICO_VOICE_NAME As String = "PicoVoice"
Private Const PICO_STEP_IDLE As Byte = 200                  ' No more input text available in the Pico text input buffer
Private Const PICO_STEP_BUSY As Byte = 201                  ' Process input and producing speech output
Private Const PICO_STEP_ERROR As Short = -200           
Private Const PICO_EXC_UNEXPECTED_FILE_TYPE As Short = -41
Private Const SAMPLE_FREQ_16KHZ As Integer = 16000
Private Const FORMAT_TAG_LIN As Byte = 1                    ' linear 16 - bit encoding


Private picoInternalTaLingware As String[] = ["en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin"]
Private picoInternalSgLingware As String[] = ["en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin"]

''''''''''''''''''''
Private Const SND_PCM_STREAM_PLAYBACK As Byte = 0
Private Const SND_PCM_ACCESS_RW_INTERLEAVED As Byte = 3
Private Const nomen As String = "default"
''''''''''''''''''''


Library "libttspico:0.0.0"

' int pico_initialize(void *memory, const pico_Uint32 size, pico_System *outSystem)
' Initializes the Pico system and returns its handle in 'outSystem'.
Private Extern pico_initialize(memP As Pointer, size As Integer, outSystem As Pointer) As Integer

' int pico_loadResource(pico_System system, const pico_Char *resourceFileName, pico_Resource *outResource)
' Loads a resource file into the Pico system.
Private Extern pico_loadResource(pico_System As Pointer, resFileName$ As String, pico_Resource As Pointer) As Integer

' int pico_getResourceName(pico_System system, pico_Resource resource, pico_Retstring outName)
' Gets the unique resource name of a loaded resource.
Private Extern pico_getResourceName(pico_System As Pointer, pico_Resource As Pointer, outName As Pointer) As Integer

' int pico_createVoiceDefinition(pico_System system, const pico_Char *voiceName)
' Creates a voice definition.
Private Extern pico_createVoiceDefinition(pico_System As Pointer, voiceName As String) As Integer

' int pico_addResourceToVoiceDefinition(pico_System system, const pico_Char *voiceName, const pico_Char *resourceName)
' Adds a mapping pair ('voiceName', 'resourceName') to the voice definition.
Private Extern pico_addResourceToVoiceDefinition(pico_System As Pointer, voiceName As String, resourceName As Pointer) As Integer

' int pico_newEngine(pico_System system, const pico_Char *voiceName, pico_Engine *outEngine)
' Creates and initializes a new Pico engine instance and returns its handle in 'outEngine'.
Private Extern pico_newEngine(pico_System As Pointer, voiceName As String, outEngine As Pointer) As Integer

' picoos_Common pico_sysGetCommon(pico_System this)
Private Extern pico_sysGetCommon(pico_System As Pointer) As Pointer

' picoos_bool picoos_sdfOpenOut (picoos_Common g, picoos_SDFile * sdFile, picoos_char fileName[], int sf, picoos_encoding_t enc)
Private Extern picoos_sdfOpenOut(commP As Pointer, sdFile As Pointer, fileName As Pointer, sf As Integer, enc As Byte) as Boolean

' picoos_bool picoos_sdfCloseOut (picoos_Common g, picoos_SDFile * sdFile)
Private Extern picoos_sdfCloseOut(commP As Pointer, sdFile As Pointer) as Boolean

' int pico_putTextUtf8(pico_Engine engine, const pico_Char *text, const pico_Int16 textSize, pico_Int16 *outBytesPut)
' Puts text 'text' encoded in UTF8 into the Pico text input buffer
Private Extern pico_putTextUtf8(outEngine As Pointer, texP As Pointer, textsize As Short, outBytesPut As Pointer) As Integer

' int pico_getData(pico_Engine engine, void *outBuffer, const pico_Int16 bufferSize, pico_Int16 *outBytesReceived, pico_Int16 *outDataType)
' Gets speech data from the engine
Private Extern pico_getData(engine As Pointer, outBuffer As Pointer, bufferSize As Short, outBytesReceived As Pointer, outDataType As Pointer) As Integer

' picoos_bool picoos_sdfPutSamples (picoos_SDFile sdFile, picoos_uint32 nrSamples, picoos_int16 samples[])
Private Extern picoos_sdfPutSamples(sdFile As Pointer, buf As Byte, buff As Pointer) As Boolean

' pico_disposeEngine(pico_System system, pico_Engine *inoutEngine)
' Disposes a Pico engine and releases all memory it occupied.
Private Extern pico_disposeEngine(pico_System As Pointer, inoutEngine As Pointer)

' pico_releaseVoiceDefinition(pico_System system, const pico_Char *voiceName)
' Releases the voice definition 'voiceName'.
Private Extern pico_releaseVoiceDefinition(pico_System As Pointer, voiceName As String)

' pico_unloadResource(pico_System system, pico_Resource *inoutResource)
' Unloads a resource file from the Pico system.
Private Extern pico_unloadResource(pico_System As Pointer, inoutResource As Pointer)

' pico_terminate(pico_System *system)
' Terminates the Pico system.
Private Extern pico_terminate(pico_System As Pointer)

'''''''''''''''''''''''''''''''''''''''''''''''''''''


Library "libasound:2"

' int   snd_pcm_open(snd_pcm_t **pcm, const char *name, snd_pcm_stream_t stream, int mode)
' Apre il sub-sistema PCM
Private Extern snd_pcm_open(pcmP As Pointer, nome As String, stream As Integer, mode As Integer) As Integer

' const char * snd_strerror (int errnum)
' Returns the message For an Error code.
Private Extern snd_strerror(errnum As Integer) As String

' int snd_pcm_set_params (snd_pcm_t *pcm, snd_pcm_format_t format, snd_pcm_access_t access, unsigned int canali, unsigned int rate, int soft_resample, unsigned int latency)
' Set the hardware And software parameters
Private Extern snd_pcm_set_params(pcmP As Pointer, formatB As Byte, accessB As Byte, canali As Integer, rate As Integer, soft_resample As Integer, latency As Integer) As Integer

' snd_pcm_sframes_t snd_pcm_writei(snd_pcm_t *pcm, const void *buffer, snd_pcm_uframes_t size)
' Il terzo parametro "uframes" deve essere sempre uguale ad 1/4 del secondo parametro "buffer" !
Private Extern snd_pcm_writei(pcmP As Pointer, buffP As Pointer, uframes As Long) As Integer

' int snd_pcm_start(snd_pcm_t *pcm)
Private Extern snd_pcm_start(pcmP As Pointer) As Integer

' int snd_pcm_close(snd_pcm_t **pcm)
Private Extern snd_pcm_close(pcmP As Pointer)

'''''''''''''''''''''''''''''''''''''''''''''''''''''


Library "libc:6"

' void *memcpy(void *str1, const void *str2, size_t n)
' copies n characters from memory area str2 to memory area str1.
Private Extern memcpy(str1 As Pointer, str2 As Pointer, n As Integer)


Public Sub Form_Open()

 Dim buffer, local_text, picoMemArea, picoSystem As Pointer
 Dim picoTaResource, picoSgResource, picoTaResourceName As Pointer
 Dim picoSgResourceName, picoEngine, common, outBytesPut, outbuf As Pointer
 Dim resourceFileName, picoSgFileName As String
 Dim buffersize As Short = 256
 Dim ret, getstatus As Integer
 Dim bytes_sent, bytes_recv, text_remaining, out_data_type, bufused As Short

 Dim handle, wav, sdOutFile As Pointer
 Dim err, frames As Integer
 Dim risolBit As Integer = 2
 Dim frequenza As Integer = 8000
 Dim canali As Byte = 2


' Apre il sub-sistema PCM di ALSA e gli passa dei valori per i parametri della riproduzione audio:
   err = snd_pcm_open(VarPtr(handle), nomen, SND_PCM_STREAM_PLAYBACK, 0)
     If err < 0 Then Error.Raise("Playback open error: " & snd_strerror(err))

   err = snd_pcm_set_params(handle, risolBit, SND_PCM_ACCESS_RW_INTERLEAVED, canali, frequenza, 1, 500000)
     If err < 0 Then Error.Raise("Playback open error: " & snd_strerror(err))


' Impostiamo il testo, al quale applicare la sistesi vocale:
   testo = Alloc("www.gambas-it.org è il sito della comunità italiana dei programmatori Gambas.")
 
   buffer = Alloc(bufferSize)
   
   outbuf = Alloc(CInt(MAX_OUTBUF_SIZE / 2))
   
   picoMemArea = Alloc(PICO_MEM_SIZE)
   
' Inizializza la libreria 'libttspico':
   ret = pico_initialize(picoMemArea, PICO_MEM_SIZE, VarPtr(picoSystem))
   If ret < 0 Then Error.Raise("Impossibile inizializzare la libreria 'libttspico' !")

' Carica il file lingware di risorse di analisi del testo.
' Per modificare la lingua di sintesi, cambiare il valore dell'indice della variabile "picoInternalTaLingware[]":
   resourceFileName = PICO_LINGWARE_PATH & picoInternalTaLingware[5]

   ret = pico_loadResource(picoSystem, resourceFileName, VarPtr(picoTaResource)) 
   If ret < 0 Then Error.Raise("Errore alla funzione 'pico_loadResource' !")
 
 
' Carica il file lingware di risorse di generazione del segnale.
' Per modificare la lingua di sintesi, cambiare il valore dell'indice della variabile "picoInternalSgLingware[]":
  picoSgFileName = PICO_LINGWARE_PATH & picoInternalSgLingware[5]
   
   ret = pico_loadResource(picoSystem, picoSgFileName, VarPtr(picoSgResource))

   
' Ottiene il nome della risorsa di analisi del testo:
   picoTaResourceName = Alloc(PICO_MAX_RESOURCE_NAME_SIZE)
   
   ret = pico_getResourceName(picoSystem, picoTaResource, picoTaResourceName)


' Ottiene il nome della risorsa di generazione del segnale:
   picoSgResourceName = Alloc(PICO_MAX_RESOURCE_NAME_SIZE)
   
   ret = pico_getResourceName(picoSystem, picoSgResource, picoSgResourceName)

   
' Crea una definizione vocale:
   ret = pico_createVoiceDefinition(picoSystem, PICO_VOICE_NAME)

   
' Aggiunge alla voce la risorsa di analisi del testo:
   ret = pico_addResourceToVoiceDefinition(picoSystem, PICO_VOICE_NAME, picoTaResourceName)

   
' Aggiunge alla voce la risorsa di generazione del segnale:
   ret = pico_addResourceToVoiceDefinition(picoSystem, PICO_VOICE_NAME, picoSgResourceName)

   
' Crea a nuovo motore Pico:
   ret = pico_newEngine(picoSystem, PICO_VOICE_NAME, VarPtr(picoEngine))


' Imposta il percorso ed il nome del file wav che sarà salvato:
  wav = Alloc("percorso/del/file.wav")

  local_text = testo
   
  text_remaining = Len(String@(local_text)) + 1

  common = pico_sysGetCommon(picoSystem)
   
' Apre il file wav:
   picoos_sdfOpenOut(common, VarPtr(sdOutFile), wav, SAMPLE_FREQ_16KHZ, FORMAT_TAG_LIN)
    
   outBytesPut = Alloc(SizeOf(gb.Short))

   While text_remaining
     
' Invia i dati al motore Pico:
     ret = pico_putTextUtf8(picoEngine, testo, text_remaining, outBytesPut)
     bytes_sent = Short@(outBytesPut)
     text_remaining -= bytes_sent

      
' Ciclo per inserire i campioni audio nel buffer:
     Do

       getstatus = pico_getData(picoEngine, outbuf, MAX_OUTBUF_SIZE, VarPtr(bytes_recv), VarPtr(out_data_type))

       picoos_sdfPutSamples(sdOutFile, bufused / 2, buffer)
       bufused = 0
       memcpy(buffer, outbuf, bytes_recv)
       bufused += bytes_recv

' Gli stessi dati vengono inviati ad Alsa per consentire l'ascolto immediato del testo:
       frames = snd_pcm_writei(handle, buffer, bufused / SizeOf(gb.Integer))
       If (frames < 0) Then Error.Raise("Errore alla funzione 'snd_pcm_writei': " & snd_strerror(err))


     Loop While PICO_STEP_BUSY = getstatus
     
   Wend
   
' Chiude il file wav:
   picoos_sdfCloseOut(common, VarPtr(sdOutFile))

' Chiude l'handle della libreria Alsa:
   snd_pcm_close(handle)
 
 
' Chiude infine anche tutte le risorse della libreria "libttspico":

   If IsNull(picoEngine) = False Then
     pico_disposeEngine(picoSystem, VarPtr(picoEngine))
     pico_releaseVoiceDefinition(picoSystem, PICO_VOICE_NAME)
     picoEngine = Null
   Endif
   
   If IsNull(picoSgResource) = False Then
     pico_unloadResource(picoSystem, VarPtr(picoSgResource))
     picoSgResource = Null
   Endif
   
   If IsNull(picoTaResource) = False Then
     pico_unloadResource(picoSystem, VarPtr(picoTaResource))
     picoTaResource = Null
   Endif

   If IsNull(picoSystem) = False Then
     pico_terminate(VarPtr(picoSystem))
     picoSystem = Null
   Endif
   
' Libera la memoria allocata:
   Free(outBytesPut)
   Free(wav)
   Free(picoMemArea)
   Free(picoTaResourceName)
   Free(picoSgResourceName)
   Free(outbuf)
   Free(buffer)
   Free(testo)

End


Riferimenti