iceberg表文件结构
一张iceberg表的文件组织结构如下所示:
[root@abc-10-10-10-11 ~]# hadoop fs -ls /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition
Found 2 items
drwxr-xr-x - hadoop supergroup 0 2025-06-02 12:03 /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/data
drwxr-xr-x - hadoop supergroup 0 2025-06-02 12:03 /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata
数据目录(data)
数据目录结构如下所示:
[root@abc-10-10-10-11 ~]# hadoop fs -ls /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/data
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2025-06-02 12:03 /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/data/dt=20220501
[root@abc-10-10-10-11 ~]# hadoop fs -ls /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/data/dt=20220501
Found 1 items
-rw-r--r-- 3 hadoop supergroup 670 2025-06-02 12:03 /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/data/dt=20220501/00000-1-131a29fa-741a-4c85-91c4-1fd23a23578a-0-00001.parquet
元数据目录(metadata)
元数据目录如下所示:
[root@abc-10-10-10-11 ~]# hadoop fs -ls /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata
Found 4 items
-rw-r--r-- 3 hadoop supergroup 1176 2025-06-02 12:03 /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata/00000-40c5199d-b380-4675-afdb-74e028ea6559.metadata.json
-rw-r--r-- 3 hadoop supergroup 2588 2025-06-02 12:03 /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata/00001-fd678b94-eaf3-4993-bcb6-5c90fac4fdfc.metadata.json
-rw-r--r-- 3 hadoop supergroup 6914 2025-06-02 12:03 /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata/eb1817e4-01f3-401e-a0d5-2a62e907dbda-m0.avro
-rw-r--r-- 3 hadoop supergroup 4498 2025-06-02 12:03 /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata/snap-4502910375816569575-1-eb1817e4-01f3-401e-a0d5-2a62e907dbda.avro
元数据版本(00000-{UUID}.metadata.json)
[root@abc-10-10-10-11 ~]# hadoop fs -cat /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata/00000-40c5199d-b380-4675-afdb-74e028ea6559.metadata.json
{
"format-version" : 2,
"table-uuid" : "68f5b114-bcac-4ea2-94b4-b0ef0697217c",
"location" : "hdfs://HDFS78000003/data/hive/warehouse/iceberg_test_db.db/spark_hive_partition",
"last-sequence-number" : 0,
"last-updated-ms" : 1750651393777,
"last-column-id" : 2,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "id",
"required" : false,
"type" : "long"
}, {
"id" : 2,
"name" : "dt",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ {
"name" : "dt",
"transform" : "identity",
"source-id" : 2,
"field-id" : 1000
} ]
} ],
"last-partition-id" : 1000,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "hadoop",
"write.parquet.compression-codec" : "zstd"
},
"current-snapshot-id" : -1,
"refs" : { },
"snapshots" : [ ],
"statistics" : [ ],
"partition-statistics" : [ ],
"snapshot-log" : [ ],
"metadata-log" : [ ]
}
元数据版本(00001-{UUID}.metadata.json)
[root@abc-10-10-10-11 ~]# hadoop fs -cat /data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata/00001-fd678b94-eaf3-4993-bcb6-5c90fac4fdfc.metadata.json
{
"format-version" : 2,
"table-uuid" : "68f5b114-bcac-4ea2-94b4-b0ef0697217c",
"location" : "hdfs://ABC0000001/usr/hive/warehouse/iceberg_autotest_db.db/spark_hive_partition",
"last-sequence-number" : 1,
"last-updated-ms" : 1750651398083,
"last-column-id" : 2,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "id",
"required" : false,
"type" : "long"
}, {
"id" : 2,
"name" : "dt",
"required" : false,
"type" : "string"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ {
"name" : "dt",
"transform" : "identity",
"source-id" : 2,
"field-id" : 1000
} ]
} ],
"last-partition-id" : 1000,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"owner" : "hadoop",
"write.parquet.compression-codec" : "zstd"
},
"current-snapshot-id" : 4502910375816569575,
"refs" : {
"main" : {
"snapshot-id" : 4502910375816569575,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 4502910375816569575,
"timestamp-ms" : 1750651398083,
"summary" : {
"operation" : "append",
"spark.app.id" : "application_1750612138448_0357",
"added-data-files" : "1",
"added-records" : "1",
"added-files-size" : "670",
"changed-partition-count" : "1",
"total-records" : "1",
"total-files-size" : "670",
"total-data-files" : "1",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0",
"engine-version" : "3.5.2",
"app-id" : "application_1750612138448_0357",
"engine-name" : "spark",
"iceberg-version" : "Apache Iceberg 1.6.1"
},
"manifest-list" : "hdfs://ABC0000001/data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata/snap-4502910375816569575-1-eb1817e4-01f3-401e-a0d5-2a62e907dbda.avro",
"schema-id" : 0
} ],
"statistics" : [ ],
"partition-statistics" : [ ],
"snapshot-log" : [ {
"timestamp-ms" : 1750651398083,
"snapshot-id" : 4502910375816569575
} ],
"metadata-log" : [ {
"timestamp-ms" : 1750651392777,
"metadata-file" : "hdfs://ABC0000001/data/hive/warehouse/iceberg_test_db.db/spark_hive_partition/metadata/00000-40c5199d-b380-4675-afdb-74e028ea6559.metadata.json"
} ]
}