2017-12-20 16 views
-1

を受け取りましたGoogleのクラウドデータフローを使用してデータのキャッシュを作成し、Webリクエストを高速化しています。データセットとそれがどのようにグループ化されるかは、私たちのコントロールから少し外れているので、私たちはいくつかの非常に非正統なことをやっています。とにかく、私たちはしばらくこのエラーを毎回受けていますが、仕事は時折実行され、成功することがあります。 ジョブID:2017-12-19_22_30_10-4314752451342817881Googleクラウドデータフロー内部:GOAWAYが

java.lang.RuntimeException: org.apache.beam.sdk.util.UserCodeException: java.lang.RuntimeException: unexpected 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1.output(GroupAlsoByWindowsParDoFn.java:182) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner$1.outputWindowedValue(GroupAlsoByWindowFnRunner.java:104) 
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowViaIteratorsFn.processElement(BatchGroupAlsoByWindowViaIteratorsFn.java:121) 
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowViaIteratorsFn.processElement(BatchGroupAlsoByWindowViaIteratorsFn.java:53) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.invokeProcessElement(GroupAlsoByWindowFnRunner.java:117) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.processElement(GroupAlsoByWindowFnRunner.java:74) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn.processElement(GroupAlsoByWindowsParDoFn.java:113) 
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48) 
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52) 
at com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.runReadLoop(ReadOperation.java:187) 
at com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.start(ReadOperation.java:148) 
at com.google.cloud.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:68) 
at com.google.cloud.dataflow.worker.DataflowWorker.executeWork(DataflowWorker.java:330) 
at com.google.cloud.dataflow.worker.DataflowWorker.doWork(DataflowWorker.java:302) 
at com.google.cloud.dataflow.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:251) 
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:135) 
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:115) 
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:102) 
at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
at java.lang.Thread.run(Thread.java:745) 
Caused by: org.apache.beam.sdk.util.UserCodeException: java.lang.RuntimeException: unexpected 
at org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeException.java:36) 
at com.monsanto.product360.beam.dataflow.materialviews.groupers.CabnAuspGrouper$DoFnInvoker.invokeProcessElement(Unknown Source) 
at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:177) 
at org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:141) 
at com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:324) 
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48) 
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52) 
at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(SimpleParDoFn.java:272) 
at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowedValue(SimpleDoFnRunner.java:211) 
at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(SimpleDoFnRunner.java:66) 
at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:436) 
at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:424) 
at org.apache.beam.sdk.transforms.join.CoGroupByKey$ConstructCoGbkResultFn.processElement(CoGroupByKey.java:206) 
at org.apache.beam.sdk.transforms.join.CoGroupByKey$ConstructCoGbkResultFn$DoFnInvoker.invokeProcessElement(Unknown Source) 
at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:177) 
at org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:141) 
at com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:324) 
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48) 
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1.output(GroupAlsoByWindowsParDoFn.java:180) 
... 21 more 
Caused by: java.lang.RuntimeException: unexpected 
at com.google.cloud.dataflow.worker.util.common.worker.CachingShuffleBatchReader.read(CachingShuffleBatchReader.java:79) 
at com.google.cloud.dataflow.worker.util.common.worker.BatchingShuffleEntryReader$ShuffleReadIterator.fillEntries(BatchingShuffleEntryReader.java:133) 
at com.google.cloud.dataflow.worker.util.common.worker.BatchingShuffleEntryReader$ShuffleReadIterator.fillEntriesIfNeeded(BatchingShuffleEntryReader.java:126) 
at com.google.cloud.dataflow.worker.util.common.worker.BatchingShuffleEntryReader$ShuffleReadIterator.hasNext(BatchingShuffleEntryReader.java:90) 
at com.google.cloud.dataflow.worker.util.common.ForwardingReiterator.hasNext(ForwardingReiterator.java:62) 
at com.google.cloud.dataflow.worker.util.common.worker.GroupingShuffleEntryIterator$ValuesIterator.advance(GroupingShuffleEntryIterator.java:283) 
at com.google.cloud.dataflow.worker.util.common.worker.GroupingShuffleEntryIterator$ValuesIterator.hasNext(GroupingShuffleEntryIterator.java:278) 
at com.google.cloud.dataflow.worker.GroupingShuffleReader$GroupingShuffleReaderIterator$ValuesIterator.hasNext(GroupingShuffleReader.java:357) 
at org.apache.beam.runners.core.PeekingReiterator.computeNext(PeekingReiterator.java:94) 
at org.apache.beam.runners.core.PeekingReiterator.hasNext(PeekingReiterator.java:48) 
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowViaIteratorsFn$WindowReiterator.skipToValidElement(BatchGroupAlsoByWindowViaIteratorsFn.java:226) 
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowViaIteratorsFn$WindowReiterator.hasNext(BatchGroupAlsoByWindowViaIteratorsFn.java:202) 
at org.apache.beam.sdk.repackaged.com.google.common.collect.Iterators$PeekingImpl.hasNext(Iterators.java:1105) 
at org.apache.beam.sdk.transforms.join.CoGbkResult$UnionValueIterator.advance(CoGbkResult.java:430) 
at org.apache.beam.sdk.transforms.join.CoGbkResult$UnionValueIterator.hasNext(CoGbkResult.java:407) 
at org.apache.beam.sdk.repackaged.com.google.common.collect.MultitransformedIterator.hasNext(MultitransformedIterator.java:47) 
at com.monsanto.product360.beam.dataflow.materialviews.groupers.CabnAuspGrouper.processElement(CabnAuspGrouper.java:35) 
Caused by: java.util.concurrent.ExecutionException: java.io.IOException: INTERNAL: GOAWAY received 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.util.concurrent.AbstractFuture.getDoneValue(AbstractFuture.java:500) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.util.concurrent.AbstractFuture.get(AbstractFuture.java:459) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.util.concurrent.AbstractFuture$TrustedFuture.get(AbstractFuture.java:76) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:142) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2373) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2337) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2295) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2208) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache.get(LocalCache.java:4053) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4057) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4986) 
at com.google.cloud.dataflow.worker.util.common.worker.CachingShuffleBatchReader.read(CachingShuffleBatchReader.java:76) 
at com.google.cloud.dataflow.worker.util.common.worker.BatchingShuffleEntryReader$ShuffleReadIterator.fillEntries(BatchingShuffleEntryReader.java:133) 
at com.google.cloud.dataflow.worker.util.common.worker.BatchingShuffleEntryReader$ShuffleReadIterator.fillEntriesIfNeeded(BatchingShuffleEntryReader.java:126) 
at com.google.cloud.dataflow.worker.util.common.worker.BatchingShuffleEntryReader$ShuffleReadIterator.hasNext(BatchingShuffleEntryReader.java:90) 
at com.google.cloud.dataflow.worker.util.common.ForwardingReiterator.hasNext(ForwardingReiterator.java:62) 
at com.google.cloud.dataflow.worker.util.common.worker.GroupingShuffleEntryIterator$ValuesIterator.advance(GroupingShuffleEntryIterator.java:283) 
at com.google.cloud.dataflow.worker.util.common.worker.GroupingShuffleEntryIterator$ValuesIterator.hasNext(GroupingShuffleEntryIterator.java:278) 
at com.google.cloud.dataflow.worker.GroupingShuffleReader$GroupingShuffleReaderIterator$ValuesIterator.hasNext(GroupingShuffleReader.java:357) 
at org.apache.beam.runners.core.PeekingReiterator.computeNext(PeekingReiterator.java:94) 
at org.apache.beam.runners.core.PeekingReiterator.hasNext(PeekingReiterator.java:48) 
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowViaIteratorsFn$WindowReiterator.skipToValidElement(BatchGroupAlsoByWindowViaIteratorsFn.java:226) 
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowViaIteratorsFn$WindowReiterator.hasNext(BatchGroupAlsoByWindowViaIteratorsFn.java:202) 
at org.apache.beam.sdk.repackaged.com.google.common.collect.Iterators$PeekingImpl.hasNext(Iterators.java:1105) 
at org.apache.beam.sdk.transforms.join.CoGbkResult$UnionValueIterator.advance(CoGbkResult.java:430) 
at org.apache.beam.sdk.transforms.join.CoGbkResult$UnionValueIterator.hasNext(CoGbkResult.java:407) 
at org.apache.beam.sdk.repackaged.com.google.common.collect.MultitransformedIterator.hasNext(MultitransformedIterator.java:47) 
at com.monsanto.product360.beam.dataflow.materialviews.groupers.CabnAuspGrouper.processElement(CabnAuspGrouper.java:35) 
at com.monsanto.product360.beam.dataflow.materialviews.groupers.CabnAuspGrouper$DoFnInvoker.invokeProcessElement(Unknown Source) 
at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:177) 
at org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:141) 
at com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:324) 
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48) 
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52) 
at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(SimpleParDoFn.java:272) 
at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowedValue(SimpleDoFnRunner.java:211) 
at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(SimpleDoFnRunner.java:66) 
at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:436) 
at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:424) 
at org.apache.beam.sdk.transforms.join.CoGroupByKey$ConstructCoGbkResultFn.processElement(CoGroupByKey.java:206) 
at org.apache.beam.sdk.transforms.join.CoGroupByKey$ConstructCoGbkResultFn$DoFnInvoker.invokeProcessElement(Unknown Source) 
at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:177) 
at org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:141) 
at com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:324) 
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48) 
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1.output(GroupAlsoByWindowsParDoFn.java:180) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner$1.outputWindowedValue(GroupAlsoByWindowFnRunner.java:104) 
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowViaIteratorsFn.processElement(BatchGroupAlsoByWindowViaIteratorsFn.java:121) 
at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindowViaIteratorsFn.processElement(BatchGroupAlsoByWindowViaIteratorsFn.java:53) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.invokeProcessElement(GroupAlsoByWindowFnRunner.java:117) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.processElement(GroupAlsoByWindowFnRunner.java:74) 
at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn.processElement(GroupAlsoByWindowsParDoFn.java:113) 
at com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:48) 
at com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:52) 
at com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.runReadLoop(ReadOperation.java:187) 
at com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.start(ReadOperation.java:148) 
at com.google.cloud.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:68) 
at com.google.cloud.dataflow.worker.DataflowWorker.executeWork(DataflowWorker.java:330) 
at com.google.cloud.dataflow.worker.DataflowWorker.doWork(DataflowWorker.java:302) 
at com.google.cloud.dataflow.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:251) 
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:135) 
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:115) 
at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:102) 
at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
at java.lang.Thread.run(Thread.java:745) 
Caused by: java.io.IOException: INTERNAL: GOAWAY received 
at com.google.cloud.dataflow.worker.ApplianceShuffleReader.readIncludingPosition(Native Method) 
at com.google.cloud.dataflow.worker.ChunkingShuffleBatchReader.read(ChunkingShuffleBatchReader.java:62) 
at com.google.cloud.dataflow.worker.util.common.worker.CachingShuffleBatchReader$1.load(CachingShuffleBatchReader.java:57) 
at com.google.cloud.dataflow.worker.util.common.worker.CachingShuffleBatchReader$1.load(CachingShuffleBatchReader.java:53) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3628) 
at com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2336) 
... 62 more 

答えて

0

これは一時的なネットワークエラーのように見えます。分散システムでは、一時的なエラーが発生するため、Dataflowは障害を数回再試行します(ただし、時にはそれらが有用であることがあるため、ログを記録します)。あなたの仕事が成功すれば、心配することはありません。

+0

返信いただきありがとうございます。私たちは多くのシャッフルを続けています。このエラーは、ジョブに別のステップを追加してシャッフルとメモリの使用量を増やすまで、最近まで表示されていませんでした。私たちはあなたが概説したのと同じ仮定をしていました。私はここに何かを載せたいと思っていました。なぜなら、これが、今後ますます多くのデータを追加していくうちに、本当に悪いことを示唆するエラーであるかどうかわからないからです。 –

+1

非常に大量のデータをシャッフルしている場合は、シャッフルサービスを試すことができます。https://cloud.google.com/blog/big-data/2017/06/introducing-cloud-dataflow-shuffle- 5倍の性能向上のためのデータ解析の改善パイプライン – jkff

関連する問題