Introduction

Welcome to the VocaliD SDK docs!

Sdk

Initialize Sdk

VocalidSdk vocalidSdk = VocalidSdk.newInstance(context, bearerToken);

// create a new sdk object
let sdk = VocalidSdk.newInstance(bearerToken: token)

A bearer token(Oauth2) is required to initialise the VocalidSdk. To get a bearer token please follow the

api authentication docs.

Voices

Voice

Voice voice = new Voice();
voice.getCanStream();
voice.getCanDownload();
voice.getDownloadToken();
voice.getVoiceId();
voice.getLicensed();
voice.getFilename();
voice.getModelname();
voice.getName();

public struct VoiceInfo{
    public var isDownloadable:Bool
    public var isStreamable:Bool
    public var fileName:String
    public var downloadToken: String
    public var voiceId: String
    public var licenseName: String
    public var isDownloaded: Bool
    public var isLicensed: Bool 
}

Parameter	Description
canDownload	boolean indicating whether or not the voice can be downloaded via the the API.
canStream	boolean indicating whether or not the voice can be used for synthesis via the API.
downloadToken	token used for downloading the voice, license file or generating synthesis.
expiration	the time after which the voice will no longer be available or null if the voice will never expire.
licensed	boolean indicating whether or not the voice is licensed for local synthesis. this value depends on a valid license file being loaded.
voiceId	the UUID of the voice.
filename	recommended filename when downloading voice locally.
modelname	type of voice model (Hi Res or Low Res).
name	name assigned to voice.

Get Api Voice List

ArrayList<Voice> voiceList = vocalidSdk.get_api_voices(bearerToken);

 sdk.getApiVoices { (infos, error) in
    if let err = error {
        print(err.localizedDescription)
        return
    }
    guard let voiceInfos = infos else {
        print("no voice infos fetched")
         return
     }
     // use voiceInfos here
}

Query api for list of voices available for synthesis and download.

Download Voice

boolean success = vocalidSdk.download_voice_file(bearerToken, downloadToken, filePath);
byte[] voiceData = vocalidSdk.download_voice_data(bearerToken, downloadToken);


if voiceInfo.isDownloadable{
  sdk.downloadVoiceFile(voiceInfo: voiceInfo) { (success, error) in
      if success {
          print("success")
          // voiceInfo.isDownloaded should return true
      }else {
          print(error?.localizedDescription)
      }
  }

}

Download voice files to disk or to a byte array using the voice's download token and bearer token used during api voice query.

File Info

Voice voice = vocalidsdk.get_voice_info(String path);

Query information about local voice files. Get information like voice id, expiration and if voice is licensed.

Licenses

Download License

boolean success = vocalidSdk.download_license_file(bearerToken, downloadToken, filePath);
byte[] licenseData = vocalidSdk.download_license_data(bearerToken, downloadToken, filePath);

if voiceInfo.isDownloadable{
    sdk.downloadLicenseFile(voiceInfo: voiceInfo) { (success, error) in
       if success {
            print("success")
            // voiceInfo.isLicensed should return true
        }else {
            print(error?.localizedDescription)
        }
    }
}

License files are required to access local synthesis. To download a license file for a voice a bearer token and download token of the voice are required.

Load License

boolean success = vocalidSdk.load_license_file("/path/to/license.txt");
boolean success = vocalidSdk.load_license_data(byteData);

let engine = sdk.getEngine()
let licensePath = VOCDataStore.default.vocalidDirectory + voiceInfo.licenseName
engine?.loadLicense(licensePath)

License files must be loaded prior to performing local synthesis.

License File Info

DrmBlock license = vocalidSdk.get_license_info(path);
license.getExpired();
license.getExpirationString();
license.getVoiceId();

Query information about local license files. Get information like voice id and expiration.

Synthesis

Local Synthesis

VocalidAudioFormat audioFormat = new VocalidAudioFormat(
        "pcm", // codec
         "" // container 
);
VocalidRequest vocalidRequest = new VocalidRequest(
        48000, // sample rate
        1,     // channels
        "1",   // rate of speech
        "1",   // pitch
        "path/to/voice.vocalidnet",
        "Text to synthesize.",
        audioFormat
);

vocalidSdk.local_synthesis(request, callback);

 //1. load license
let engine = sdk.getEngine()
let licensePath = VOCDataStore.default.vocalidDirectory + voiceResult.licenseName
engine?.loadLicense(licensePath)

//2. load voice
engine?.setVoicePath(VOCDataStore.default.vocalidDirectory)
engine?.setVoice(voiceResult.fileName, loadNow: true)  

//3. synthesize locally   
engine?.speak("This is my vocal idenitiy")

Perform local synthesis using a downloaded voice. Audio data is returned via callbacks (example).

Streaming Synthesis

VocalidAudioFormat audioFormat = new VocalidAudioFormat(
        "pcm", // codec
         "" // container 
);
VocalidRequest vocalidRequest = new VocalidRequest(
        48000, // sample rate
        1,     // channels
        "1",   // rate of speech
        "1",   // pitch
        downloadToken,
        "Text to synthesize.",
        audioFormat
);

vocalidSdk.stream_synthesis(bearerToken, request, callback);

 let vocalidRequest = VocalidRequest(pitch: 1, rate: 1, script: "This is my vocal identity")

 sdk.streamSynthesis(voiceInfo: voiceResult, vq: vocalidRequest) { (data, error) in
    guard let d = data else {
          print("no data available")
          return
    }
    let player = try? AVAudioPlayer(data: d)
     player?.play()
 }

Perform streaming synthesis using a download and bearer token. Audio data is returned via callbacks (example).

Example Synthesis Callback

class SynthesisCallback extends Callback {
    public int minBuffer;
    public AudioTrack audioTrack;

    SynthesisCallback(){

        minBuffer = AudioTrack.getMinBufferSize(
                48000,
                AudioFormat.CHANNEL_OUT_MONO,
                AudioFormat.ENCODING_PCM_16BIT);

        audioTrack = new AudioTrack(
                AudioManager.STREAM_MUSIC,
                48000,
                AudioFormat.CHANNEL_OUT_MONO,
                AudioFormat.ENCODING_PCM_16BIT,
                minBuffer,
                AudioTrack.MODE_STREAM);
    }


    public void call(final byte[] data) {
        audioTrack.play();
        playAudio(data);
        audioTrack.stop();
    }

    void playAudio(byte[] b) {
        if (b.length == 0) {
            return;
        }
        final int maxBytesToCopy = minBuffer;
        int offset = 0;

        // write audio data to audioTrack
        while (offset < b.length) {
            final int bytesToWrite = Math.min(maxBytesToCopy, (b.length - offset));
            audioTrack.write(b, offset, bytesToWrite);
            offset += bytesToWrite;
        }
    }
}