val catalog = new HiveCatalog() catalog.setConf(spark.sparkContext.hadoopConfiguration) // Optionally use Spark's Hadoop configuration val properties = new HashMap[String, String]() properties.put("warehouse", "/hive/warehouse") catalog.initialize("hive", properties);
import org.apache.iceberg.Table import org.apache.iceberg.catalog.TableIdentifier val name = TableIdentifier.of("hudi_ods", "t_asset_file_last") //val name = TableIdentifier.of("hudi_ods", "drs_content") val table = catalog.loadTable(name)
val tsToExpire = System.currentTimeMillis() - (1000 * 60 * 60 * 24 * 5 ) // 5 day table.expireSnapshots().expireOlderThan(tsToExpire).commit()
val data = spark.read.format("avro").load("hdfs://bicluster/hive/warehouse/hudi_ods.db/t_asset_file_last_v2/metadata/snap-4703358464354234152-1-9930e33d-1861-4149-adbd-68cc8838ef11.avro")