samples/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

/*
 * Copyright (C) 2011 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.example.android.ttsengine;

import android.content.Context;
import android.content.SharedPreferences;
import android.media.AudioFormat;
import android.speech.tts.SynthesisCallback;
import android.speech.tts.SynthesisRequest;
import android.speech.tts.TextToSpeech;
import android.speech.tts.TextToSpeechService;
import android.util.Log;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.HashMap;
import java.util.Map;

/**
 * A text to speech engine that generates "speech" that a robot might understand.
 * The engine supports two different "languages", each with their own frequency
 * mappings.
 *
 * It exercises all aspects of the Text to speech engine API
 * {@link android.speech.tts.TextToSpeechService}.
 */
public class RobotSpeakTtsService extends TextToSpeechService {
    private static final String TAG = "ExampleTtsService";

    /*
     * This is the sampling rate of our output audio. This engine outputs
     * audio at 16khz 16bits per sample PCM audio.
     */
    private static final int SAMPLING_RATE_HZ = 16000;

    /*
     * We multiply by a factor of two since each sample contains 16 bits (2 bytes).
     */
    private final byte[] mAudioBuffer = new byte[SAMPLING_RATE_HZ * 2];

    private Map<Character, Integer> mFrequenciesMap;
    private volatile String[] mCurrentLanguage = null;
    private volatile boolean mStopRequested = false;
    private SharedPreferences mSharedPrefs = null;

    @Override
    public void onCreate() {
        super.onCreate();
        mSharedPrefs = getSharedPreferences(GeneralSettingsFragment.SHARED_PREFS_NAME,
                Context.MODE_PRIVATE);
        // We load the default language when we start up. This isn't strictly
        // required though, it can always be loaded lazily on the first call to
        // onLoadLanguage or onSynthesizeText. This a tradeoff between memory usage
        // and the latency of the first call.
        onLoadLanguage("eng", "usa", "");
    }

    @Override
    public void onDestroy() {
        super.onDestroy();
    }

    @Override
    protected String[] onGetLanguage() {
        // Note that mCurrentLanguage is volatile because this can be called from
        // multiple threads.
        return mCurrentLanguage;
    }

    @Override
    protected int onIsLanguageAvailable(String lang, String country, String variant) {
        // The robot speak synthesizer supports only english.
        if ("eng".equals(lang)) {
            // We support two specific robot languages, the british robot language
            // and the american robot language.
            if ("USA".equals(country) || "GBR".equals(country)) {
                // If the engine supported a specific variant, we would have
                // something like.
                //
                // if ("android".equals(variant)) {
                //     return TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE;
                // }
                return TextToSpeech.LANG_COUNTRY_AVAILABLE;
            }

            // We support the language, but not the country.
            return TextToSpeech.LANG_AVAILABLE;
        }

        return TextToSpeech.LANG_NOT_SUPPORTED;
    }

    /*
     * Note that this method is synchronized, as is onSynthesizeText because
     * onLoadLanguage can be called from multiple threads (while onSynthesizeText
     * is always called from a single thread only).
     */
    @Override
    protected synchronized int onLoadLanguage(String lang, String country, String variant) {
        final int isLanguageAvailable = onIsLanguageAvailable(lang, country, variant);

        if (isLanguageAvailable == TextToSpeech.LANG_NOT_SUPPORTED) {
            return isLanguageAvailable;
        }

        String loadCountry = country;
        if (isLanguageAvailable == TextToSpeech.LANG_AVAILABLE) {
            loadCountry = "USA";
        }

        // If we've already loaded the requested language, we can return early.
        if (mCurrentLanguage != null) {
            if (mCurrentLanguage[0].equals(lang) && mCurrentLanguage[1].equals(country)) {
                return isLanguageAvailable;
            }
        }

        Map<Character, Integer> newFrequenciesMap = null;
        try {
            InputStream file = getAssets().open(lang + "-" + loadCountry + ".freq");
            newFrequenciesMap = buildFrequencyMap(file);
            file.close();
        } catch (IOException e) {
            Log.e(TAG, "Error loading data for : " + lang + "-" + country);
        }

        mFrequenciesMap = newFrequenciesMap;
        mCurrentLanguage = new String[] { lang, loadCountry, ""};

        return isLanguageAvailable;
    }

    @Override
    protected void onStop() {
        mStopRequested = true;
    }

    @Override
    protected synchronized void onSynthesizeText(SynthesisRequest request,
            SynthesisCallback callback) {
        // Note that we call onLoadLanguage here since there is no guarantee
        // that there would have been a prior call to this function.
        int load = onLoadLanguage(request.getLanguage(), request.getCountry(),
                request.getVariant());

        // We might get requests for a language we don't support - in which case
        // we error out early before wasting too much time.
        if (load == TextToSpeech.LANG_NOT_SUPPORTED) {
            callback.error();
            return;
        }

        // At this point, we have loaded the language we need for synthesis and
        // it is guaranteed that we support it so we proceed with synthesis.

        // We denote that we are ready to start sending audio across to the
        // framework. We use a fixed sampling rate (16khz), and send data across
        // in 16bit PCM mono.
        callback.start(SAMPLING_RATE_HZ,
                AudioFormat.ENCODING_PCM_16BIT, 1 /* Number of channels. */);

        // We then scan through each character of the request string and
        // generate audio for it.
        final String text = request.getText().toLowerCase();
        for (int i = 0; i < text.length(); ++i) {
            char value = normalize(text.charAt(i));
            // It is crucial to call either of callback.error() or callback.done() to ensure
            // that audio / other resources are released as soon as possible.
            if (!generateOneSecondOfAudio(value, callback)) {
                callback.error();
                return;
            }
        }

        // Alright, we're done with our synthesis - yay!
        callback.done();
    }

    /*
     * Normalizes a given character to the range 'a' - 'z' (inclusive). Our
     * frequency mappings contain frequencies for each of these characters.
     */
    private static char normalize(char input) {
        if (input == ' ') {
            return input;
        }

        if (input < 'a') {
            return 'a';
        }
        if (input > 'z') {
            return 'z';
        }

        return input;
    }

    private Map<Character, Integer> buildFrequencyMap(InputStream is) throws IOException {
        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        String line = null;
        Map<Character, Integer> map = new HashMap<Character, Integer>();
        try {
            while ((line = br.readLine()) != null) {
                String[] parts = line.split(":");
                if (parts.length != 2) {
                    throw new IOException("Invalid line encountered: " + line);
                }
                map.put(parts[0].charAt(0), Integer.parseInt(parts[1]));
            }
            map.put(' ', 0);
            return map;
        } finally {
            is.close();
        }
    }

    private boolean generateOneSecondOfAudio(char alphabet, SynthesisCallback cb) {
        ByteBuffer buffer = ByteBuffer.wrap(mAudioBuffer).order(ByteOrder.LITTLE_ENDIAN);

        // Someone called onStop, end the current synthesis and return.
        // The mStopRequested variable will be reset at the beginning of the
        // next synthesis.
        //
        // In general, a call to onStop( ) should make a best effort attempt
        // to stop all processing for the *current* onSynthesizeText request (if
        // one is active).
        if (mStopRequested) {
            return false;
        }


        if (mFrequenciesMap == null || !mFrequenciesMap.containsKey(alphabet)) {
            return false;
        }

        final int frequency = mFrequenciesMap.get(alphabet);

        if (frequency > 0) {
            // This is the wavelength in samples. The frequency is chosen so that the
            // waveLength is always a multiple of two and frequency divides the
            // SAMPLING_RATE exactly.
            final int waveLength = SAMPLING_RATE_HZ / frequency;
            final int times = SAMPLING_RATE_HZ / waveLength;

            for (int j = 0; j < times; ++j) {
                // For a square curve, half of the values will be at Short.MIN_VALUE
                // and the other half will be Short.MAX_VALUE.
                for (int i = 0; i < waveLength / 2; ++i) {
                    buffer.putShort((short)(getAmplitude() * -1));
                }
                for (int i = 0; i < waveLength / 2; ++i) {
                    buffer.putShort(getAmplitude());
                }
            }
        } else {
            // Play a second of silence.
            for (int i = 0; i < mAudioBuffer.length / 2; ++i) {
                buffer.putShort((short) 0);
            }
        }

        // Get the maximum allowed size of data we can send across in audioAvailable.
        final int maxBufferSize = cb.getMaxBufferSize();
        int offset = 0;
        while (offset < mAudioBuffer.length) {
            int bytesToWrite = Math.min(maxBufferSize, mAudioBuffer.length - offset);
            cb.audioAvailable(mAudioBuffer, offset, bytesToWrite);
            offset += bytesToWrite;
        }
        return true;
    }

    private short getAmplitude() {
        boolean whisper = mSharedPrefs.getBoolean(GeneralSettingsFragment.WHISPER_KEY, false);
        return (short) (whisper ? 2048 : 8192);
    }
}