0

を停止し、私は(thisで認証が必要です)https://github.com/GoogleCloudPlatform/nodejs-docs-samples/tree/master/speechから次のコード例(recognize.js)をジャッキました:Nodejs Googleの音声APIストリームが

/** 
* Copyright 2016, Google, Inc. 
* Licensed under the Apache License, Version 2.0 (the "License"); 
* you may not use this file except in compliance with the License. 
* You may obtain a copy of the License at 
* 
* http://www.apache.org/licenses/LICENSE-2.0 
* 
* Unless required by applicable law or agreed to in writing, software 
* distributed under the License is distributed on an "AS IS" BASIS, 
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
* See the License for the specific language governing permissions and 
* limitations under the License. 
*/ 

/** 
* This application demonstrates how to perform basic recognize operations with 
* with the Google Cloud Speech API. 
* 
* For more information, see the README.md under /speech and the documentation 
* at https://cloud.google.com/speech/docs. 
*/ 

'use strict'; 

function syncRecognize (filename, encoding, sampleRate) { 
    // [START speech_sync_recognize] 
    // Imports the Google Cloud client library 
    const Speech = require('@google-cloud/speech'); 

    // Instantiates a client 
    const speech = Speech(); 

    // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw 
    // const filename = '/path/to/audio.raw'; 

    // The encoding of the audio file, e.g. 'LINEAR16' 
    // const encoding = 'LINEAR16'; 

    // The sample rate of the audio file, e.g. 16000 
    // const sampleRate = 16000; 

    const request = { 
    encoding: encoding, 
    sampleRate: sampleRate 
    }; 

    // Detects speech in the audio file 
    speech.recognize(filename, request) 
    .then((results) => { 
     const transcription = results[0]; 

     console.log(`Transcription: ${transcription}`); 
    }); 
    // [END speech_sync_recognize] 
} 

function syncRecognizeGCS (gcsUri, encoding, sampleRate) { 
    // [START speech_sync_recognize_gcs] 
    // Imports the Google Cloud client library 
    const Speech = require('@google-cloud/speech'); 

    // Instantiates a client 
    const speech = Speech(); 

    // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw 
    // const gcsUri = 'gs://my-bucket/audio.raw'; 

    // The encoding of the audio file, e.g. 'LINEAR16' 
    // const encoding = 'LINEAR16'; 

    // The sample rate of the audio file, e.g. 16000 
    // const sampleRate = 16000; 

    const request = { 
    encoding: encoding, 
    sampleRate: sampleRate 
    }; 

    // Detects speech in the audio file 
    speech.recognize(gcsUri, request) 
    .then((results) => { 
     const transcription = results[0]; 

     console.log(`Transcription: ${transcription}`); 
    }); 
    // [END speech_sync_recognize_gcs] 
} 

function asyncRecognize (filename, encoding, sampleRate) { 
    // [START speech_async_recognize] 
    // Imports the Google Cloud client library 
    const Speech = require('@google-cloud/speech'); 

    // Instantiates a client 
    const speech = Speech(); 

    // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw 
    // const filename = '/path/to/audio.raw'; 

    // The encoding of the audio file, e.g. 'LINEAR16' 
    // const encoding = 'LINEAR16'; 

    // The sample rate of the audio file, e.g. 16000 
    // const sampleRate = 16000; 

    const request = { 
    encoding: encoding, 
    sampleRate: sampleRate 
    }; 

    // Detects speech in the audio file. This creates a recognition job that you 
    // can wait for now, or get its result later. 
    speech.startRecognition(filename, request) 
    .then((results) => { 
     const operation = results[0]; 
     // Get a Promise represention of the final result of the job 
     return operation.promise(); 
    }) 
    .then((transcription) => { 
     console.log(`Transcription: ${transcription}`); 
    }); 
    // [END speech_async_recognize] 
} 

function asyncRecognizeGCS (gcsUri, encoding, sampleRate) { 
    // [START speech_async_recognize_gcs] 
    // Imports the Google Cloud client library 
    const Speech = require('@google-cloud/speech'); 

    // Instantiates a client 
    const speech = Speech(); 

    // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw 
    // const gcsUri = 'gs://my-bucket/audio.raw'; 

    // The encoding of the audio file, e.g. 'LINEAR16' 
    // const encoding = 'LINEAR16'; 

    // The sample rate of the audio file, e.g. 16000 
    // const sampleRate = 16000; 

    const request = { 
    encoding: encoding, 
    sampleRate: sampleRate 
    }; 

    // Detects speech in the audio file. This creates a recognition job that you 
    // can wait for now, or get its result later. 
    speech.startRecognition(gcsUri, request) 
    .then((results) => { 
     const operation = results[0]; 
     // Get a Promise represention of the final result of the job 
     return operation.promise(); 
    }) 
    .then((transcription) => { 
     console.log(`Transcription: ${transcription}`); 
    }); 
    // [END speech_async_recognize_gcs] 
} 

function streamingRecognize (filename, encoding, sampleRate) { 
    // [START speech_streaming_recognize] 
    const fs = require('fs'); 

    // Imports the Google Cloud client library 
    const Speech = require('@google-cloud/speech'); 

    // Instantiates a client 
    const speech = Speech(); 

    // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw 
    // const filename = '/path/to/audio.raw'; 

    // The encoding of the audio file, e.g. 'LINEAR16' 
    // const encoding = 'LINEAR16'; 

    // The sample rate of the audio file, e.g. 16000 
    // const sampleRate = 16000; 

    const request = { 
    config: { 
     encoding: encoding, 
     sampleRate: sampleRate 
    } 
    }; 

    // Stream the audio to the Google Cloud Speech API 
    const recognizeStream = speech.createRecognizeStream(request) 
    .on('error', console.error) 
    .on('data', (data) => { 
     console.log('Data received: %j', data); 
    }); 

    // Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw" 
    fs.createReadStream(filename).pipe(recognizeStream); 
    // [END speech_streaming_recognize] 
} 

function streamingMicRecognize (encoding, sampleRate) { 
    // [START speech_streaming_mic_recognize] 
    const record = require('node-record-lpcm16'); 

    // Imports the Google Cloud client library 
    const Speech = require('@google-cloud/speech'); 

    // Instantiates a client 
    const speech = Speech(); 

    // The encoding of the audio file, e.g. 'LINEAR16' 
    // const encoding = 'LINEAR16'; 

    // The sample rate of the audio file, e.g. 16000 
    // const sampleRate = 16000; 

    const request = { 
    config: { 
     encoding: encoding, 
     sampleRate: sampleRate 
    } 
    }; 

    // Create a recognize stream 
    const recognizeStream = speech.createRecognizeStream(request) 
    .on('error', console.error) 
    .on('data', (data) => process.stdout.write(data.results)); 

    // Start recording and send the microphone input to the Speech API 
    record.start({ 
    sampleRate: sampleRate, 
    threshold: 0 
    }).pipe(recognizeStream); 

    console.log('Listening, press Ctrl+C to stop.'); 
    // [END speech_streaming_mic_recognize] 
} 

require(`yargs`) 
    .demand(1) 
    .command(
    `sync <filename>`, 
    `Detects speech in a local audio file.`, 
    {}, 
    (opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRate) 
) 
    .command(
    `sync-gcs <gcsUri>`, 
    `Detects speech in an audio file located in a Google Cloud Storage bucket.`, 
    {}, 
    (opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate) 
) 
    .command(
    `async <filename>`, 
    `Creates a job to detect speech in a local audio file, and waits for the job to complete.`, 
    {}, 
    (opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRate) 
) 
    .command(
    `async-gcs <gcsUri>`, 
    `Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, 
    {}, 
    (opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate) 
) 
    .command(
    `stream <filename>`, 
    `Detects speech in a local audio file by streaming it to the Speech API.`, 
    {}, 
    (opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRate) 
) 
    .command(
    `listen`, 
    `Detects speech in a microphone input stream.`, 
    {}, 
    (opts) => streamingMicRecognize(opts.encoding, opts.sampleRate) 
) 
    .options({ 
    encoding: { 
     alias: 'e', 
     default: 'LINEAR16', 
     global: true, 
     requiresArg: true, 
     type: 'string' 
    }, 
    sampleRate: { 
     alias: 'r', 
     default: 16000, 
     global: true, 
     requiresArg: true, 
     type: 'number' 
    } 
    }) 
    .example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`) 
    .example(`node $0 async-gcs gs://my-bucket/audio.raw -e LINEAR16 -r 16000`) 
    .example(`node $0 stream ./resources/audio.raw -e LINEAR16 -r 16000`) 
    .example(`node $0 listen`) 
    .wrap(120) 
    .recommendCommands() 
    .epilogue(`For more information, see https://cloud.google.com/speech/docs`) 
    .help() 
    .strict() 
    .argv; 

私はそれを実行します。ノードrecognize.jsはを聞きます

この例は、最初に動作します。私はマイクに向かって話し、私がコンソールに言うことを記録する。問題は、私が数秒間黙って聞き/認識/印刷を停止するということです。私はさまざまなファイルを見てきましたが、その理由を理解することはできません。

答えて

0

私は100%確実ではないんだけど、複数の説明があるかもしれないように聞こえる:

  1. 要求「は、単一の発話」を求めている(StreamingRecognitionConfig

    これは珍しいと思われるので、デフォルトはfalseと表示されます。ただし、要求に明示的に傷つけることはできません()

  2. クライアント定義のタイムアウト(createRecognizeStream)が実行されています。

    これはちょっと変わっていると思います。私はあなたが60秒間ストレートに話していないと思っています。

  3. あなたのマイクはスピーチクライアントに伝播するストリームを閉じています。

    これはもう少し考えられるかもしれませんが、私は100%自信がありません。最後の項目を排除

あなたがマイクに向かって言っているものを記録してcreateRecognizeStream方法(というよりも、ライブのオーディオストリーム)を経由して送信されたファイルで、この問題を再現できる場合は、それを作るだろう診断が容易です。

+0

また、この例では、ドキュメントに記載されているnode-record-lpcm16モジュールを使用しています。ライブラリが無音になったときにライブラリが自動的に停止しようとします。 –

関連する問題