Real-time speech recognition for SuperApp Partner Apps
npm install @superapp_men/speech-to-textReal-time speech recognition for SuperApp Partner Apps. Convert speech to text with high accuracy, multi-language support, and real-time results.


✅ Real-time Transcription - Get partial results while speaking
✅ Multi-language Support - 20+ languages supported
✅ High Accuracy - Confidence scores for each result
✅ Alternative Transcriptions - Multiple possible interpretations
✅ Event-driven Architecture - React to speech events in real-time
✅ Rich Utilities - Clean, format, analyze, and store transcripts
✅ TypeScript - Full type safety and IntelliSense
✅ Cross-platform - Works on Web, iOS, and Android
✅ Zero Dependencies - Lightweight and efficient
``bash`
npm install @superapp_men/speech-to-text
or
`bash`
yarn add @superapp_men/speech-to-text
`typescript
import { SpeechToText, RecognitionState } from "@superapp_men/speech-to-text";
// Initialize with configuration
const speech = new SpeechToText({
timeout: 10000,
debug: true,
});
// Check availability
const available = await speech.isAvailable();
if (!available) {
console.log("Speech recognition not available");
return;
}
// Request permission
const permission = await speech.requestPermission();
if (permission !== "granted") {
console.log("Permission denied");
return;
}
// Set up event listeners BEFORE starting
speech.on("stateChange", ({ state }) => {
console.log("State changed:", state);
if (state === RecognitionState.LISTENING) {
console.log("🎤 Listening...");
}
});
// Get final result when recognition completes (auto-stops when user stops talking)
speech.on("result", ({ result }) => {
console.log("Final:", result.transcript);
console.log("Confidence:", result.confidence);
});
// Handle errors
speech.on("error", ({ message }) => {
console.error("Error:", message);
});
// Start listening
await speech.startListening({
language: "ar-MA", // Arabic (Morocco)
partialResults: false, // Auto-stop when user stops talking
popup: false, // Partner app manages its own UI
});
// Recognition automatically stops when user finishes speaking
// The 'result' event will fire with the final transcript
`
- Basic Usage
- Configuration
- API Reference
- Events
- React Integration
- Vue Integration
- Supported Languages
- Error Handling
- Best Practices
`typescript
import { SpeechToText, RecognitionState } from "@superapp_men/speech-to-text";
async function recordSpeech() {
const speech = new SpeechToText({
timeout: 10000,
debug: true,
});
// Check if available
const available = await speech.isAvailable();
if (!available) {
console.log("Speech recognition not available");
return;
}
// Request permission
const permission = await speech.requestPermission();
if (permission !== "granted") {
console.log("Permission denied");
return;
}
// Set up event listeners
speech.on("result", ({ result }) => {
console.log("Final transcript:", result.transcript);
console.log("Confidence:", result.confidence);
});
speech.on("stateChange", ({ state }) => {
console.log("State:", state);
});
speech.on("error", ({ message }) => {
console.error("Error:", message);
});
// Start listening
await speech.startListening({
language: "ar-MA", // Arabic (Morocco)
partialResults: false, // Auto-stop when user stops talking
popup: false, // Partner app manages UI
});
// Recognition automatically stops when user finishes speaking
// The 'result' event will fire with the final transcript
// No need to call stopListening() - it's automatic!
}
`
`typescript
import { useState, useEffect } from "react";
import {
SpeechToText,
RecognitionState,
Language,
} from "@superapp_men/speech-to-text";
function SpeechToTextComponent() {
const [speech] = useState(
() =>
new SpeechToText({
timeout: 10000,
debug: true,
})
);
const [state, setState] = useState
const [isListening, setIsListening] = useState(false);
const [transcript, setTranscript] = useState("");
const [partialTranscript, setPartialTranscript] = useState("");
const [error, setError] = useState
useEffect(() => {
// State changes
const unsubState = speech.on("stateChange", ({ state }) => {
setState(state);
setIsListening(state === RecognitionState.LISTENING);
});
// Real-time partial results (if partialResults: true)
const unsubPartial = speech.on("partialResult", ({ result }) => {
setPartialTranscript(result.transcript);
});
// Final result with confidence
const unsubResult = speech.on("result", ({ result }) => {
setTranscript(result.transcript);
setPartialTranscript(""); // Clear partial when final arrives
});
// Error handling
const unsubError = speech.on("error", ({ message }) => {
setError(message);
});
// Listening events
const unsubStarted = speech.on("listeningStarted", () => {
console.log("🎤 Listening started");
});
const unsubStopped = speech.on("listeningStopped", ({ duration }) => {
console.log("⏹️ Listening stopped, duration:", duration);
});
return () => {
unsubState();
unsubPartial();
unsubResult();
unsubError();
unsubStarted();
unsubStopped();
speech.destroy();
};
}, [speech]);
const handleStartListening = async () => {
try {
setError(null);
setTranscript("");
setPartialTranscript("");
const permission = await speech.requestPermission();
if (permission !== "granted") {
setError("Microphone permission is required");
return;
}
await speech.startListening({
language: Language.EN_US,
partialResults: true, // Enable real-time updates
popup: false, // Partner app manages UI
});
} catch (err) {
setError(
err instanceof Error ? err.message : "Failed to start listening"
);
}
};
return (
{error}
}{partialTranscript}...
{transcript}
}Configuration
$3
`typescript
interface SpeechToTextConfig {
timeout?: number; // Request timeout in ms (default: 5000)
debug?: boolean; // Enable debug logging (default: false)
}const speech = new SpeechToText({
timeout: 10000,
debug: true,
});
`$3
`typescript
interface SpeechRecognitionConfig {
language?: string; // Language code (default: 'en-US')
maxAlternatives?: number; // Max alternative results (default: 1)
partialResults?: boolean; // Enable partial results (default: false)
popup?: boolean; // Show native UI popup (default: false)
timeout?: number; // Recognition timeout (default: 30000)
metadata?: Record; // Custom metadata
}await speech.startListening({
language: Language.ES_ES,
maxAlternatives: 3,
partialResults: true, // true = real-time updates, false = auto-stop when user stops talking
popup: false, // false = partner app manages UI, true = show native popup
});
`Important Notes:
-
partialResults: false (default): Recognition automatically stops when the user finishes speaking. The result event fires with the final transcript. This is the recommended setting for most use cases.
- partialResults: true: Provides real-time partial results while speaking. You must manually call stopListening() to get the final result.
- popup: false (default): Partner app is responsible for its own UI. Recommended for embedded partner apps.
- popup: true: Shows native platform UI for recognition. Use when you want platform-native experience.API Reference
$3
#### Constructor
`typescript
new SpeechToText(config?: SpeechToTextConfig)
`#### Methods
#####
isAvailable(): PromiseCheck if speech recognition is available on the device.
`typescript
const available = await speech.isAvailable();
if (!available) {
console.log("Speech recognition not supported");
}
`#####
getSupportedLanguages(): PromiseGet list of supported language codes.
`typescript
const languages = await speech.getSupportedLanguages();
console.log("Supported:", languages);
// ['en-US', 'es-ES', 'fr-FR', ...]
`#####
checkPermission(): PromiseCheck current microphone permission status.
Returns:
'granted' | 'denied' | 'prompt' | 'unknown'`typescript
const status = await speech.checkPermission();
if (status === "prompt") {
await speech.requestPermission();
}
`#####
requestPermission(): PromiseRequest microphone permission from the user.
`typescript
const permission = await speech.requestPermission();
if (permission !== "granted") {
alert("Microphone permission is required");
}
`#####
startListening(config?: SpeechRecognitionConfig): PromiseStart listening for speech. Set up event listeners before calling this method.
`typescript
// Set up listeners first
speech.on("result", ({ result }) => {
console.log("Final:", result.transcript);
});speech.on("stateChange", ({ state }) => {
console.log("State:", state);
});
// Then start listening
await speech.startListening({
language: Language.EN_US,
partialResults: false, // Auto-stop when user finishes speaking
popup: false, // Partner app manages UI
});
`Behavior:
- With
partialResults: false: Recognition automatically stops when the user stops talking. The result event fires with the final transcript.
- With partialResults: true: Provides real-time partialResult events. Call stopListening() manually to get the final result.#####
stopListening(): PromiseStop listening and return final result. Note: Only needed when
partialResults: true. With partialResults: false, recognition auto-stops when the user finishes speaking.Returns:
`typescript
interface RecognitionResult {
transcript: string;
confidence: number;
isFinal: boolean;
alternatives?: TranscriptAlternative[];
timestamp: number;
}
``typescript
// Only needed when partialResults: true
if (speech.isListening()) {
const result = await speech.stopListening();
if (result) {
console.log("Transcript:", result.transcript);
console.log("Confidence:", (result.confidence * 100).toFixed(0) + "%");
}
}
`When to use:
-
partialResults: false (default): Don't call stopListening() - recognition auto-stops when user finishes speaking. Listen to the result event instead.
- partialResults: true: Call stopListening() when you want to manually stop and get the final result.#####
getStatus(): PromiseGet current recognition status.
`typescript
const status = await speech.getStatus();
console.log("Is listening:", status.isListening);
console.log("State:", status.state);
console.log("Permission:", status.permissionStatus);
`#####
getState(): RecognitionStateGet current recognition state.
Returns:
'idle' | 'starting' | 'listening' | 'processing' | 'stopped' | 'error'`typescript
const state = speech.getState();
`#####
isListening(): booleanCheck if currently listening.
`typescript
if (speech.isListening()) {
await speech.stopListening();
}
`#####
getCurrentTranscript(): stringGet the current transcript (partial or final).
`typescript
const transcript = speech.getCurrentTranscript();
`#####
getDuration(): numberGet duration of current session in milliseconds.
`typescript
const duration = speech.getDuration();
console.log("Recording for:", duration, "ms");
`#####
onAdd event listener. Returns unsubscribe function.
`typescript
const unsubscribe = speech.on("result", ({ result }) => {
console.log(result.transcript);
});// Later, unsubscribe
unsubscribe();
`#####
offRemove event listener.
`typescript
speech.off("result", myCallback);
`#####
removeAllListeners(event?: SpeechEventType): voidRemove all listeners for an event, or all events if none specified.
`typescript
speech.removeAllListeners("result"); // Remove all 'result' listeners
speech.removeAllListeners(); // Remove all listeners
`#####
setDebug(enabled: boolean): voidEnable or disable debug logging.
`typescript
speech.setDebug(true);
`#####
destroy(): voidCleanup and destroy the instance.
`typescript
speech.destroy();
`Events
$3
`typescript
type SpeechEventType =
| "stateChange" // Recognition state changed
| "listeningStarted" // Started listening
| "listeningStopped" // Stopped listening
| "partialResult" // Partial result while speaking
| "result" // Final result
| "error" // Error occurred
| "soundStart" // Sound detected
| "soundEnd" // Sound ended
| "speechStart" // Speech detected
| "speechEnd"; // Speech ended
`$3
Fired when recognition state changes.
`typescript
speech.on("stateChange", ({ state, previousState }) => {
console.log(${previousState} → ${state}); // Update UI based on state
if (state === "listening") {
button.textContent = "🛑 Stop";
} else if (state === "idle") {
button.textContent = "🎤 Start";
}
});
`$3
Fired when listening starts.
`typescript
speech.on("listeningStarted", ({ sessionId, config }) => {
console.log("Session started:", sessionId);
console.log("Language:", config.language);
});
`$3
Fired when listening stops.
`typescript
speech.on("listeningStopped", ({ sessionId, duration }) => {
console.log("Session ended:", sessionId);
console.log("Duration:", duration, "ms");
});
`$3
Fired continuously while speaking (real-time updates).
`typescript
speech.on("partialResult", ({ result }) => {
// Update UI in real-time
document.getElementById("transcript").textContent = result.transcript; console.log("Partial:", result.transcript);
console.log("Is final:", result.isFinal);
});
`$3
Fired when final result is available.
`typescript
speech.on("result", ({ result }) => {
console.log("Transcript:", result.transcript);
console.log("Confidence:", result.confidence); // Show alternatives
result.alternatives?.forEach((alt, i) => {
console.log(
Alt ${i + 1}:, alt.transcript, (${alt.confidence}));
});
});
`$3
Fired when an error occurs.
`typescript
speech.on("error", ({ code, message, details }) => {
console.error(Error [${code}]: ${message}); switch (code) {
case "PERMISSION_DENIED":
alert("Microphone access is required");
break;
case "NO_SPEECH":
alert("No speech detected");
break;
case "TIMEOUT":
alert("Recognition timeout");
break;
}
});
`$3
Fired when sound is detected/ended.
`typescript
speech.on("soundStart", () => {
console.log("🔊 Sound detected");
// Show visual indicator
});speech.on("soundEnd", () => {
console.log("🔇 Sound ended");
// Hide visual indicator
});
`$3
Fired when speech is detected/ended.
`typescript
speech.on("speechStart", () => {
console.log("🗣️ Speech started");
});speech.on("speechEnd", () => {
console.log("🤐 Speech ended");
});
`React Integration
$3
`typescript
import { useState, useEffect } from "react";
import { SpeechToText, RecognitionState } from "@superapp_men/speech-to-text";function useSpeechToText() {
const [speech] = useState(() => new SpeechToText());
const [isListening, setIsListening] = useState(false);
const [transcript, setTranscript] = useState("");
const [state, setState] = useState(RecognitionState.IDLE);
useEffect(() => {
const unsubscribers = [
speech.on("stateChange", ({ state }) => {
setState(state);
setIsListening(state === RecognitionState.LISTENING);
}),
speech.on("partialResult", ({ result }) => {
setTranscript(result.transcript);
}),
speech.on("result", ({ result }) => {
setTranscript(result.transcript);
}),
];
return () => {
unsubscribers.forEach((unsub) => unsub());
speech.destroy();
};
}, [speech]);
return { speech, isListening, transcript, state };
}
`$3
`typescript
import React, { useState, useEffect } from "react";
import {
SpeechToText,
RecognitionState,
Language,
} from "@superapp_men/speech-to-text";function VoiceRecorder() {
const [speech] = useState(
() =>
new SpeechToText({
timeout: 10000,
debug: true,
})
);
const [state, setState] = useState(RecognitionState.IDLE);
const [isListening, setIsListening] = useState(false);
const [transcript, setTranscript] = useState("");
const [partialTranscript, setPartialTranscript] = useState("");
const [error, setError] = useState(null);
const [permission, setPermission] = useState("unknown");
useEffect(() => {
// Set up all event listeners
const unsubState = speech.on("stateChange", ({ state }) => {
setState(state);
setIsListening(state === RecognitionState.LISTENING);
});
const unsubPartial = speech.on("partialResult", ({ result }) => {
setPartialTranscript(result.transcript);
});
const unsubResult = speech.on("result", ({ result }) => {
setTranscript(result.transcript);
setPartialTranscript("");
});
const unsubError = speech.on("error", ({ message }) => {
setError(message);
});
return () => {
unsubState();
unsubPartial();
unsubResult();
unsubError();
speech.destroy();
};
}, [speech]);
const handleStartListening = async () => {
try {
setError(null);
setTranscript("");
setPartialTranscript("");
if (permission !== "granted") {
const status = await speech.requestPermission();
setPermission(status);
if (status !== "granted") {
setError("Microphone permission is required");
return;
}
}
await speech.startListening({
language: Language.EN_US,
partialResults: true, // Enable real-time partial results
popup: false, // Partner app manages its own UI
});
} catch (err: any) {
setError(err.message);
}
};
return (
onClick={handleStartListening}
disabled={!isListening === false && permission !== "granted"}
>
{isListening ? "🎤 Listening..." : "🎤 Start Listening"}
{error &&
{error}
}
Transcript:
{partialTranscript && (
{partialTranscript}...
)}
{transcript && {transcript}
}
{!partialTranscript && !transcript && (
Say something...
)}
);
}
`Vue Integration
$3
`typescript
import { ref, onMounted, onUnmounted } from "vue";
import { SpeechToText, RecognitionState } from "@superapp_men/speech-to-text";export function useSpeechToText() {
const speech = new SpeechToText();
const isListening = ref(false);
const transcript = ref("");
const state = ref(RecognitionState.IDLE);
const error = ref(null);
onMounted(() => {
speech.on("stateChange", ({ state: newState }) => {
state.value = newState;
isListening.value = newState === RecognitionState.LISTENING;
});
speech.on("partialResult", ({ result }) => {
transcript.value = result.transcript;
});
speech.on("result", ({ result }) => {
transcript.value = result.transcript;
});
speech.on("error", ({ message }) => {
error.value = message;
});
});
onUnmounted(() => {
speech.destroy();
});
const startListening = async (config?: any) => {
error.value = null;
try {
const permission = await speech.requestPermission();
if (permission !== "granted") {
error.value = "Permission denied";
return;
}
await speech.startListening(config);
} catch (err: any) {
error.value = err.message;
}
};
const stopListening = async () => {
try {
await speech.stopListening();
} catch (err: any) {
error.value = err.message;
}
};
return {
isListening,
transcript,
state,
error,
startListening,
stopListening,
speech,
};
}
`$3
`vue
{{ error }}
Transcript:
{{ transcript || "Say something..." }}
`Supported Languages
$3
`typescript
enum Language {
EN_US = "en-US", // English (US)
ES_ES = "es-ES", // Spanish (Spain)
FR_FR = "fr-FR", // French
AR_SA = "ar-SA", // Arabic (Standard)
AR_MA = "ar-MA", // Arabic (Morocco)
}
`Note: Actual language availability depends on the device and platform. Use
getSupportedLanguages() to check what's available.Error Handling
$3
`typescript
enum SpeechRecognitionError {
PERMISSION_DENIED = "PERMISSION_DENIED",
NOT_SUPPORTED = "NOT_SUPPORTED",
ALREADY_LISTENING = "ALREADY_LISTENING",
NOT_LISTENING = "NOT_LISTENING",
RECOGNITION_FAILED = "RECOGNITION_FAILED",
NO_SPEECH = "NO_SPEECH",
ABORTED = "ABORTED",
AUDIO_CAPTURE = "AUDIO_CAPTURE",
NETWORK = "NETWORK",
TIMEOUT = "TIMEOUT",
SUPERAPP_NOT_AVAILABLE = "SUPERAPP_NOT_AVAILABLE",
LANGUAGE_NOT_SUPPORTED = "LANGUAGE_NOT_SUPPORTED",
}
`$3
`typescript
import {
SpeechToText,
SpeechRecognitionError,
} from "@superapp_men/speech-to-text";const speech = new SpeechToText();
// Try-catch for methods
try {
await speech.startListening();
} catch (error: any) {
switch (error.code) {
case SpeechRecognitionError.PERMISSION_DENIED:
alert("Please allow microphone access");
break;
case SpeechRecognitionError.NOT_SUPPORTED:
alert("Speech recognition not supported on this device");
break;
case SpeechRecognitionError.ALREADY_LISTENING:
console.warn("Already listening");
break;
default:
console.error("Error:", error.message);
}
}
// Event listener for errors
speech.on("error", ({ code, message, details }) => {
console.error(
Error [${code}]: ${message}); if (code === SpeechRecognitionError.NO_SPEECH) {
console.log("No speech detected, trying again...");
setTimeout(() => speech.startListening(), 1000);
}
});
`Best Practices
$3
`typescript
const available = await speech.isAvailable();
if (!available) {
// Show alternative input method
showTextInput();
return;
}
`$3
`typescript
const permission = await speech.requestPermission();
if (permission !== "granted") {
showPermissionExplanation();
return;
}
`$3
`typescript
let lastUpdate = 0;speech.on("partialResult", ({ result }) => {
// Throttle updates to avoid overwhelming UI
const now = Date.now();
if (now - lastUpdate > 100) {
updateUI(result.transcript);
lastUpdate = now;
}
});
`$3
`typescript
speech.on("stateChange", ({ state }) => {
switch (state) {
case "listening":
micButton.classList.add("recording");
break;
case "processing":
micButton.classList.add("processing");
break;
default:
micButton.classList.remove("recording", "processing");
}
});speech.on("speechStart", () => {
visualizer.start(); // Show audio waveform
});
speech.on("speechEnd", () => {
visualizer.stop();
});
`$3
`typescript
await speech.startListening({
language: "en-US",
timeout: 30000, // 30 seconds max
});// Or manually stop after time
setTimeout(async () => {
if (speech.isListening()) {
await speech.stopListening();
}
}, 30000);
`$3
`typescript
// In React
useEffect(() => {
const speech = new SpeechToText(); // ... use speech
return () => {
speech.destroy();
};
}, []);
// In Vue
onUnmounted(() => {
speech.destroy();
});
// Vanilla JS
window.addEventListener("beforeunload", () => {
speech.destroy();
});
`$3
`typescript
document.addEventListener("visibilitychange", async () => {
if (document.hidden && speech.isListening()) {
// App went to background, stop listening
await speech.stopListening();
}
});
`$3
`typescript
async function getInput() {
try {
// Try speech recognition
await speech.startListening();
return await new Promise((resolve) => {
speech.on("result", ({ result }) => resolve(result.transcript));
});
} catch (error) {
// Fall back to text input
return prompt("Please type your input:");
}
}
`Browser Support
- Chrome/Edge 80+
- Firefox 75+ (limited support)
- Safari 14+
- iOS Safari 14+ (via Capacitor)
- Android WebView (via Capacitor)
Note: Speech recognition quality and feature availability varies by platform. iOS and Android provide the best experience via Capacitor.
Troubleshooting
$3
Cause: Device doesn't support speech recognition
Solution: Check with
isAvailable() and provide alternative input$3
Cause: User denied microphone access
Solution: Explain why permission is needed and ask again
$3
Cause: No speech detected within timeout period
Solution: Increase timeout or add visual feedback
$3
Cause: Platform doesn't support partial results
Solution: Set
partialResults: false and rely on final results only$3
Solution:
- Check microphone quality
- Reduce background noise
- Use correct language setting
- Speak clearly and at moderate pace
Performance Tips
1. Reuse instances - Don't create new
SpeechToText` instances frequently- All speech processing happens on-device (iOS/Android) or via browser APIs (Web)
- No audio is sent to third-party servers by this library
- Partner Apps cannot access microphone directly
- SuperApp controls all permissions
- Use HTTPS in production
MIT
- Email: h.afifi@alexsys.solutions
---
Ready to add speech recognition to your app? Install the package and start transcribing! 🎤📝