Bug Description
What happened:
get_table has been removed from hive4.0.1, causing hive related errors during table writes.
What you expected:
Steps to reproduce:
export HUDI_WS=$(pwd)
docker compose \
-f docker/compose/docker-compose_hadoop340_hive401_spark402_arm64.yml \
up
docker compose \
-f docker/compose/docker-compose_hadoop340_hive401_spark402_arm64.yml \
exec adhoc-1 bash -lc '
spark-shell \
--jars /var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar \
--master local[2] \
--driver-class-path /etc/hadoop \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog \
--conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension \
--conf spark.sql.hive.metastore.version=4.0.1 \
--conf spark.sql.hive.metastore.jars=path \
--conf spark.sql.hive.metastore.jars.path=file:///opt/hive/lib/* \
--conf spark.sql.hive.metastore.sharedPrefixes=com.mysql.jdbc,org.postgresql,com.microsoft.sqlserver,oracle.jdbc,org.apache.hudi \
--deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1'
spark.sql("""
CREATE TABLE variant_test (
id LONG,
name STRING,
variant_data VARIANT,
dt STRING
) USING hudi
PARTITIONED BY (dt)
LOCATION '/user/hive/warehouse/variant_test'
TBLPROPERTIES (
'primaryKey' = 'id',
'preCombineField' = 'name',
'hoodie.datasource.hive_sync.enable' = 'true',
'hoodie.datasource.hive_sync.database' = 'default',
'hoodie.datasource.hive_sync.table' = 'variant_test',
'hoodie.datasource.hive_sync.jdbcurl' = 'jdbc:hive2://hiveserver:10000/',
'hoodie.datasource.hive_sync.mode' = 'hms',
'hoodie.datasource.hive_sync.partition_fields' = 'dt',
'hoodie.datasource.hive_sync.partition_extractor_class' = 'org.apache.hudi.hive.MultiPartKeysValueExtractor',
'hoodie.datasource.hive_sync.username' = 'hive',
'hoodie.datasource.hive_sync.password' = 'hive'
)
""")
spark.sql("""
INSERT INTO variant_test VALUES
(1, 'row1', parse_json('{"key":"value1"}'), '2024-01-01'),
(2, 'row2', parse_json('{"key":"value2"}'), '2024-01-01')
""")
Environment
Hudi version:
Query engine: (Spark/Flink/Trino etc)
Spark4.0.2, Hive4.0.1.
Relevant configs:
Logs and Stack Trace
Caused by: org.apache.hudi.exception.HoodieException: Got runtime exception when hive syncing variant_test
at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:183)
at org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:104)
at org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:72)
at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$metaSync$2(HoodieSparkSqlWriter.scala:940)
at scala.collection.mutable.HashSet$Node.foreach(HashSet.scala:450)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:376)
at org.apache.hudi.HoodieSparkSqlWriterInternal.metaSync(HoodieSparkSqlWriter.scala:938)
at org.apache.hudi.HoodieSparkSqlWriterInternal.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:1041)
at org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:565)
at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:188)
at org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:206)
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:128)
at org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand$.run(InsertIntoHoodieTableCommand.scala:123)
at org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand.run(InsertIntoHoodieTableCommand.scala:73)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:117)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:115)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:129)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$2(QueryExecution.scala:155)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$8(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:272)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$7(SQLExecution.scala:125)
at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:112)
at org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:106)
at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:111)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:295)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:124)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:237)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:155)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$eagerlyExecute$1(QueryExecution.scala:154)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:169)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:164)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:470)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:86)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:470)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:37)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:360)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:356)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:446)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:164)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyCommandExecuted$1(QueryExecution.scala:126)
at scala.util.Try$.apply(Try.scala:217)
at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
at org.apache.spark.util.LazyTry.tryT$lzycompute(LazyTry.scala:46)
at org.apache.spark.util.LazyTry.tryT(LazyTry.scala:46)
... 54 more
Caused by: org.apache.hudi.hive.HoodieHiveSyncException: Failed to check if table exists variant_test
at org.apache.hudi.hive.HoodieHiveSyncClient.tableExists(HoodieHiveSyncClient.java:469)
at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:241)
at org.apache.hudi.hive.HiveSyncTool.doSync(HiveSyncTool.java:195)
at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:180)
... 104 more
Caused by: org.apache.thrift.TApplicationException: Invalid method name: 'get_table'
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:79)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_table(ThriftHiveMetastore.java:1487)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_table(ThriftHiveMetastore.java:1473)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.tableExists(HiveMetaStoreClient.java:1469)
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.tableExists(SessionHiveMetaStoreClient.java:266)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.base/java.lang.reflect.Method.invoke(Unknown Source)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173)
at jdk.proxy2/jdk.proxy2.$Proxy68.tableExists(Unknown Source)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.base/java.lang.reflect.Method.invoke(Unknown Source)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2349)
at jdk.proxy2/jdk.proxy2.$Proxy68.tableExists(Unknown Source)
at org.apache.hudi.hive.HoodieHiveSyncClient.tableExists(HoodieHiveSyncClient.java:464)
... 107 more
Suppressed: org.apache.spark.util.Utils$OriginalTryStackTraceException: Full stacktrace of original doTryWithCallerStacktrace caller
at org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:106)
at org.apache.hudi.sync.common.util.SyncUtilHelpers.runHoodieMetaSync(SyncUtilHelpers.java:72)
at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$metaSync$2(HoodieSparkSqlWriter.scala:940)
at scala.collection.mutable.HashSet$Node.foreach(HashSet.scala:450)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:376)
at org.apache.hudi.HoodieSparkSqlWriterInternal.metaSync(HoodieSparkSqlWriter.scala:938)
at org.apache.hudi.HoodieSparkSqlWriterInternal.commitAndPerformPostOperations(HoodieSparkSqlWriter.scala:1041)
at org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:565)
at org.apache.hudi.HoodieSparkSqlWriterInternal.$anonfun$write$1(HoodieSparkSqlWriter.scala:188)
at org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:206)
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:128)
at org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand$.run(InsertIntoHoodieTableCommand.scala:123)
at org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand.run(InsertIntoHoodieTableCommand.scala:73)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:117)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:115)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:129)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$2(QueryExecution.scala:155)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$8(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:272)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$7(SQLExecution.scala:125)
at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:112)
at org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:106)
at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:111)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:295)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:124)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:237)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:155)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$eagerlyExecute$1(QueryExecution.scala:154)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:169)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:164)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:470)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:86)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:470)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:37)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:360)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:356)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:446)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:164)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyCommandExecuted$1(QueryExecution.scala:126)
at scala.util.Try$.apply(Try.scala:217)
at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
at org.apache.spark.util.LazyTry.tryT$lzycompute(LazyTry.scala:46)
at org.apache.spark.util.LazyTry.tryT(LazyTry.scala:46)
... 54 more
Bug Description
What happened:
get_tablehas been removed fromhive4.0.1, causing hive related errors during table writes.What you expected:
Steps to reproduce:
Environment
Hudi version:
Query engine: (Spark/Flink/Trino etc)
Spark4.0.2, Hive4.0.1.
Relevant configs:
Logs and Stack Trace