Introduction
Welcome to the VocaliD SDK docs!
Sdk
Initialize Sdk
VocalidSdk vocalidSdk = VocalidSdk.newInstance(context, bearerToken);
// create a new sdk object
let sdk = VocalidSdk.newInstance(bearerToken: token)
A bearer token(Oauth2) is required to initialise the VocalidSdk. To get a bearer token please follow the
api authentication docs.
Voices
Voice
Voice voice = new Voice();
voice.getCanStream();
voice.getCanDownload();
voice.getDownloadToken();
voice.getVoiceId();
voice.getLicensed();
voice.getFilename();
voice.getModelname();
voice.getName();
public struct VoiceInfo{
public var isDownloadable:Bool
public var isStreamable:Bool
public var fileName:String
public var downloadToken: String
public var voiceId: String
public var licenseName: String
public var isDownloaded: Bool
public var isLicensed: Bool
}
Parameter | Description |
---|---|
canDownload | boolean indicating whether or not the voice can be downloaded via the the API. |
canStream | boolean indicating whether or not the voice can be used for synthesis via the API. |
downloadToken | token used for downloading the voice, license file or generating synthesis. |
expiration | the time after which the voice will no longer be available or null if the voice will never expire. |
licensed | boolean indicating whether or not the voice is licensed for local synthesis. this value depends on a valid license file being loaded. |
voiceId | the UUID of the voice. |
filename | recommended filename when downloading voice locally. |
modelname | type of voice model (Hi Res or Low Res). |
name | name assigned to voice. |
Get Api Voice List
ArrayList<Voice> voiceList = vocalidSdk.get_api_voices(bearerToken);
sdk.getApiVoices { (infos, error) in
if let err = error {
print(err.localizedDescription)
return
}
guard let voiceInfos = infos else {
print("no voice infos fetched")
return
}
// use voiceInfos here
}
Query api for list of voices available for synthesis and download.
Download Voice
boolean success = vocalidSdk.download_voice_file(bearerToken, downloadToken, filePath);
byte[] voiceData = vocalidSdk.download_voice_data(bearerToken, downloadToken);
if voiceInfo.isDownloadable{
sdk.downloadVoiceFile(voiceInfo: voiceInfo) { (success, error) in
if success {
print("success")
// voiceInfo.isDownloaded should return true
}else {
print(error?.localizedDescription)
}
}
}
Download voice files to disk or to a byte array using the voice's download token and bearer token used during api voice query.
File Info
Voice voice = vocalidsdk.get_voice_info(String path);
Query information about local voice files. Get information like voice id, expiration and if voice is licensed.
Licenses
Download License
boolean success = vocalidSdk.download_license_file(bearerToken, downloadToken, filePath);
byte[] licenseData = vocalidSdk.download_license_data(bearerToken, downloadToken, filePath);
if voiceInfo.isDownloadable{
sdk.downloadLicenseFile(voiceInfo: voiceInfo) { (success, error) in
if success {
print("success")
// voiceInfo.isLicensed should return true
}else {
print(error?.localizedDescription)
}
}
}
License files are required to access local synthesis. To download a license file for a voice a bearer token and download token of the voice are required.
Load License
boolean success = vocalidSdk.load_license_file("/path/to/license.txt");
boolean success = vocalidSdk.load_license_data(byteData);
let engine = sdk.getEngine()
let licensePath = VOCDataStore.default.vocalidDirectory + voiceInfo.licenseName
engine?.loadLicense(licensePath)
License files must be loaded prior to performing local synthesis.
License File Info
DrmBlock license = vocalidSdk.get_license_info(path);
license.getExpired();
license.getExpirationString();
license.getVoiceId();
Query information about local license files. Get information like voice id and expiration.
Synthesis
Local Synthesis
VocalidAudioFormat audioFormat = new VocalidAudioFormat(
"pcm", // codec
"" // container
);
VocalidRequest vocalidRequest = new VocalidRequest(
48000, // sample rate
1, // channels
"1", // rate of speech
"1", // pitch
"path/to/voice.vocalidnet",
"Text to synthesize.",
audioFormat
);
vocalidSdk.local_synthesis(request, callback);
//1. load license
let engine = sdk.getEngine()
let licensePath = VOCDataStore.default.vocalidDirectory + voiceResult.licenseName
engine?.loadLicense(licensePath)
//2. load voice
engine?.setVoicePath(VOCDataStore.default.vocalidDirectory)
engine?.setVoice(voiceResult.fileName, loadNow: true)
//3. synthesize locally
engine?.speak("This is my vocal idenitiy")
Perform local synthesis using a downloaded voice. Audio data is returned via callbacks (example).
Streaming Synthesis
VocalidAudioFormat audioFormat = new VocalidAudioFormat(
"pcm", // codec
"" // container
);
VocalidRequest vocalidRequest = new VocalidRequest(
48000, // sample rate
1, // channels
"1", // rate of speech
"1", // pitch
downloadToken,
"Text to synthesize.",
audioFormat
);
vocalidSdk.stream_synthesis(bearerToken, request, callback);
let vocalidRequest = VocalidRequest(pitch: 1, rate: 1, script: "This is my vocal identity")
sdk.streamSynthesis(voiceInfo: voiceResult, vq: vocalidRequest) { (data, error) in
guard let d = data else {
print("no data available")
return
}
let player = try? AVAudioPlayer(data: d)
player?.play()
}
Perform streaming synthesis using a download and bearer token. Audio data is returned via callbacks (example).
Example Synthesis Callback
class SynthesisCallback extends Callback {
public int minBuffer;
public AudioTrack audioTrack;
SynthesisCallback(){
minBuffer = AudioTrack.getMinBufferSize(
48000,
AudioFormat.CHANNEL_OUT_MONO,
AudioFormat.ENCODING_PCM_16BIT);
audioTrack = new AudioTrack(
AudioManager.STREAM_MUSIC,
48000,
AudioFormat.CHANNEL_OUT_MONO,
AudioFormat.ENCODING_PCM_16BIT,
minBuffer,
AudioTrack.MODE_STREAM);
}
public void call(final byte[] data) {
audioTrack.play();
playAudio(data);
audioTrack.stop();
}
void playAudio(byte[] b) {
if (b.length == 0) {
return;
}
final int maxBytesToCopy = minBuffer;
int offset = 0;
// write audio data to audioTrack
while (offset < b.length) {
final int bytesToWrite = Math.min(maxBytesToCopy, (b.length - offset));
audioTrack.write(b, offset, bytesToWrite);
offset += bytesToWrite;
}
}
}