2017-12-09 4 views
0

私は、HiveとEMRを使用してS3アクセスログを分割するthis blog postに従っています。私は大丈夫アクセスログの小さなバケツに対してこのスクリプトを実行することができましたが、テーブルの作成、大きなバケツ(〜1.5 TB)の上に、次のエラーで失敗しますHiveテーブル、フレークなメタストア接続を作成できません

FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. org.apache.thrift.transport.TTransportException 

私はハイブログを見て、何も目立たない:/ mnt/var/log/hive。このエラーはかなり一般的なので、問題が何であるかはわかりません。私はより多くのログ情報を発見し、また、デバッグモードでハイブを走った:私はかなり10分または15分

CREATE EXTERNAL TABLE IF NOT EXISTS Accesslogs(.... 

を更新した後、次のうち逐語的とスクリプトエラーのブログ記事を以下のよ。 EMRは最終的に推測で

.........
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_151] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_151] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_151] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_151] at org.apache.hadoop.util.RunJar.run(RunJar.java:221) [hadoop-common-2.7.3-amzn-5.jar:?] at org.apache.hadoop.util.RunJar.main(RunJar.java:136) [hadoop-common-2.7.3-amzn-5.jar:?] Caused by: java.net.ConnectException: Connection refused (Connection refused) at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_151] at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_151] at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_151] at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_151] at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_151] at java.net.Socket.connect(Socket.java:589) ~[?:1.8.0_151] at org.apache.thrift.transport.TSocket.open(TSocket.java:221) ~[hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] ... 33 more 2017-12-10T15:18:18,718 INFO [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: hive.metastore (HiveMetaStoreClient.java:open(506)) - Waiting 1 seconds before next connection attempt. 2017-12-10T15:18:19,719 INFO [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: hive.metastore (HiveMetaStoreClient.java:open(392)) - Trying to connect to metastore with URI thrift://ip-172-50-31-107.ec2.internal:9083 2017-12-10T15:18:19,719 WARN [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: hive.metastore (HiveMetaStoreClient.java:open(472)) - Failed to connect to the MetaStore Server... org.apache.thrift.transport.TTransportException: java.net.ConnectException: Connection refused (Connection refused) at org.apache.thrift.transport.TSocket.open(TSocket.java:226) ~[hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:465) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.reconnect(HiveMetaStoreClient.java:335) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:163) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at com.sun.proxy.$Proxy37.createTable(Unknown Source) [?:?] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_151] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_151] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_151] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_151] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2303) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at com.sun.proxy.$Proxy37.createTable(Unknown Source) [?:?] at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:854) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:869) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4356) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:354) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227) [hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686) [hive-cli-2.3.1-amzn-0.jar:2.3.1-amzn-0] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_151] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_151] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_151] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_151] at org.apache.hadoop.util.RunJar.run(RunJar.java:221) [hadoop-common-2.7.3-amzn-5.jar:?] at org.apache.hadoop.util.RunJar.main(RunJar.java:136) [hadoop-common-2.7.3-amzn-5.jar:?] Caused by: java.net.ConnectException: Connection refused (Connection refused) at java.net.PlainSocketImpl.socketConnect(Native Method) ~[?:1.8.0_151] at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350) ~[?:1.8.0_151] at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206) ~[?:1.8.0_151] at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188) ~[?:1.8.0_151] at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[?:1.8.0_151] at java.net.Socket.connect(Socket.java:589) ~[?:1.8.0_151] at org.apache.thrift.transport.TSocket.open(TSocket.java:221) ~[hive-exec-2.3.1-amzn-0.jar:2.3.1-amzn-0] ... 33 more 2017-12-10T15:18:19,720 INFO [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: hive.metastore (HiveMetaStoreClient.java:open(506)) - Waiting 1 seconds before next connection attempt. 2017-12-10T15:18:20,721 INFO [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: hive.metastore (HiveMetaStoreClient.java:open(392)) - Trying to connect to metastore with URI thrift://ip-172-50-31-107.ec2.internal:9083 2017-12-10T15:18:20,721 INFO [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: hive.metastore (HiveMetaStoreClient.java:open(466)) - Opened a connection to metastore, current connections: 1 2017-12-10T15:18:20,795 INFO [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: hive.metastore (HiveMetaStoreClient.java:open(519)) - Connected to metastore. 2017-12-10T15:18:28,013 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:18:28,014 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:19:28,014 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:19:28,014 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:20:28,014 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:20:28,014 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:21:28,015 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:21:28,015 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:22:28,015 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:22:28,015 DEBUG [java-sdk-http-connection-reaper([])]: conn.PoolingHttpClientConnectionManager (PoolingHttpClientConnectionManager.java:closeIdleConnections(401)) - Closing connections idle longer than 60000 MILLISECONDS 2017-12-10T15:22:44,472 ERROR [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: exec.DDLTask (DDLTask.java:failed(639)) - org.apache.hadoop.hive.ql.metadata.HiveException: org.apache.thrift.transport.TTransportException at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:864) at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:869) at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4356) at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:354) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Caused by: org.apache.thrift.transport.TTransportException at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:132) at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429) at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318) at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_create_table_with_environment_context(ThriftHiveMetastore.java:1199) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.create_table_with_environment_context(ThriftHiveMetastore.java:1185) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.create_table_with_environment_context(HiveMetaStoreClient.java:2372) at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.create_table_with_environment_context(SessionHiveMetaStoreClient.java:93) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:737) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:725) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173) at com.sun.proxy.$Proxy37.createTable(Unknown Source) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2303) at com.sun.proxy.$Proxy37.createTable(Unknown Source) at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:854) ... 22 more

2017-12-10T15:22:44,472 ERROR [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: ql.Driver (SessionState.java:printError(1126)) - FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. org.apache.thrift.transport.TTransportException 2017-12-10T15:22:44,472 DEBUG [e74af478-3227-4bf9-9fde-74d8babcf5f0 main([])]: ql.Driver (DriverContext.java:shutdown(132)) - Shutting down query CREATE EXTERNAL TABLE IF NOT EXISTS Accesslogs( BucketOwner string, Bucket string, RequestDateTime string, RemoteIP string, Requester string, RequestID string, Operation string, Key string, RequestURI_operation string, RequestURI_key string, RequestURI_httpProtoversion string, HTTPstatus string, ErrorCode string, BytesSent string, ObjectSize string, TotalTime string, TurnAroundTime string, Referrer string, UserAgent string, VersionId string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = '1',

答えて

1

を失敗した後、メタストアへの断続的な接続エラーを取得し、ハイブは、ファイルシステム(再帰treewalk、名前の変更)に高速で何かをしようとしている、それはS3になったときにオーバーキールいるとこれらのことはすべてクライアントに偽装されています。

これについてHiveに対してJIRAを提出することを検討してください。任意のログサーバー側も含めることができます。また、いくつかの異なるスケールファイルを試して、何が失敗するかを確認します。

関連する問題