unit uMain;
{$mode objfpc}{$H+}
// NOTE - to use SAPI:
// 1. Install and use in project LazActiveX package
// 2. Create SpeechLib_5_4_TLB (dll header): Tools -> Import Type Library ...:
// - browse for dll
// - select Create visual component (creates TAxc objects)
// - OK, save TLB file in project directory
// 3. Install EN-US (or other) language pack with Speech Recognition in Windows Settings
//
// typical path to SAPI dll files:
// C:\Windows\SysWOW64\Speech\Common\sapi.dll - 32bit
// C:\Windows\System32\Speech\Common\sapi.dll - 64bit
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, StdCtrls, LCLType, SpeechLib_5_4_TLB;
type
TfrmMain = class(TForm)
memLog: TMemo;
procedure FormCreate(Sender: TObject);
private
FSpInProcRecoContext: TAxcSpInProcRecoContext;
FRecoGrammar: ISpeechRecoGrammar;
procedure FOnSoundStart(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant);
procedure FOnSoundEnd(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant);
procedure FOnHypothesis(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant; Result: ISpeechRecoResult);
procedure FOnRecognition(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant; RecognitionType: SpeechRecognitionType; Result: ISpeechRecoResult);
procedure FOnFalseRecognition(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant; Result: ISpeechRecoResult);
public
end;
var
frmMain: TfrmMain;
implementation
{$R *.lfm}
procedure TfrmMain.FormCreate(Sender: TObject);
var
recoContext: ISpeechRecoContext;
category: TAxcSpObjectTokenCategory;
audioToken: TAxcSpObjectToken;
const
// NOTE: instead of defining SAPI_AudioInId, a const from SAPI .dll should be used, but it's not imported to TLB
SAPI_AudioInId = 'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\AudioInput';
begin
try
//mic audio in setup
category := TAxcSpObjectTokenCategory.Create(Self);
category.OleServer.SetId(SAPI_AudioInId, False);
category.Visible := False; //to avoid appearing TAxc object on Form
audioToken := TAxcSpObjectToken.Create(Self);
audioToken.OleServer.SetId(category.OleServer.Default, '', False);
audioToken.Visible := False;
//creating recognition object and setting audio in
FSpInProcRecoContext := TAxcSpInProcRecoContext.Create(Self);
FSpInProcRecoContext.Visible := False;
recoContext := FSpInProcRecoContext.OleServer;
recoContext.EventInterests := SREAllEvents;
recoContext.Recognizer.AudioInput := audioToken.OleServer;
//event handling
FSpInProcRecoContext.OnSoundStart := @FOnSoundStart; //start of sound detected
FSpInProcRecoContext.OnSoundEnd := @FOnSoundEnd; //end of sound detected
FSpInProcRecoContext.OnHypothesis := @FOnHypothesis; //recognition hypothesis
FSpInProcRecoContext.OnRecognition := @FOnRecognition; //recognition result - successful recognition
FSpInProcRecoContext.OnFalseRecognition := @FOnFalseRecognition; //recognition result - false recognition (not enough confidence)
//loading grammar file
FRecoGrammar := recoContext.CreateGrammar(0);
try
FRecoGrammar.CmdLoadFromFile('grammar.xml', SLOStatic);
FRecoGrammar.CmdSetRuleState('', SGDSActive); //set all top-level rules active
except
Application.MessageBox('Loading grammar file failed. Check if file exists and contains no errors.', 'Error', MB_ICONERROR);
end;
except
Application.MessageBox('Cannot initialize SAPI. Check if Speech Recognition is installed in Windows.', 'Error', MB_ICONERROR);
end;
end;
procedure TfrmMain.FOnSoundStart(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant);
begin
memLog.Lines.Add('OnSoundStart');
end;
procedure TfrmMain.FOnSoundEnd(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant);
begin
memLog.Lines.Add('OnSoundEnd');
end;
procedure TfrmMain.FOnHypothesis(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant; Result: ISpeechRecoResult);
begin
memLog.Lines.Add('OnHypothesis: ' + Result.PhraseInfo.GetText(0, -1, True));
end;
procedure TfrmMain.FOnRecognition(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant; RecognitionType: SpeechRecognitionType; Result: ISpeechRecoResult);
begin
memLog.Lines.Add('OnRecognition: ' + Result.PhraseInfo.GetText(0, -1, True) + ' (Rule: ' + Result.PhraseInfo.Rule.Name + ')');
end;
procedure TfrmMain.FOnFalseRecognition(Sender: TObject; StreamNumber: Integer; StreamPosition: OleVariant; Result: ISpeechRecoResult);
begin
memLog.Lines.Add('OnFalseRecognition: ' + Result.PhraseInfo.GetText(0, -1, True));
end;
end.