28#include <unordered_map>
31#include "iceberg/iceberg_export.h"
32#include "iceberg/table_properties.h"
36#include "iceberg/util/timepoint.h"
48 return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id;
60 return lhs.timestamp_ms == rhs.timestamp_ms && lhs.metadata_file == rhs.metadata_file;
73 static constexpr int8_t kDefaultTableFormatVersion = 2;
74 static constexpr int8_t kSupportedTableFormatVersion = 3;
75 static constexpr int8_t kMinFormatVersionRowLineage = 3;
76 static constexpr int8_t kMinFormatVersionDefaultValues = 3;
77 static constexpr int64_t kInitialSequenceNumber = 0;
78 static constexpr int64_t kInitialRowId = 0;
80 static inline const std::unordered_map<TypeId, int8_t> kMinFormatVersions = {};
95 std::vector<std::shared_ptr<iceberg::Schema>>
schemas;
109 std::vector<std::shared_ptr<iceberg::Snapshot>>
snapshots;
121 std::unordered_map<std::string, std::shared_ptr<SnapshotRef>>
refs;
123 std::vector<std::shared_ptr<struct StatisticsFile>>
statistics;
129 static Result<std::unique_ptr<TableMetadata>> Make(
132 const std::unordered_map<std::string, std::string>& properties,
133 int format_version = kDefaultTableFormatVersion);
137 Result<std::shared_ptr<iceberg::Schema>>
Schema()
const;
140 Result<std::shared_ptr<iceberg::Schema>> SchemaById(int32_t schema_id)
const;
143 Result<std::shared_ptr<iceberg::PartitionSpec>>
PartitionSpec()
const;
146 Result<std::shared_ptr<iceberg::PartitionSpec>> PartitionSpecById(
147 int32_t spec_id)
const;
150 Result<std::shared_ptr<iceberg::SortOrder>>
SortOrder()
const;
153 Result<std::shared_ptr<iceberg::SortOrder>> SortOrderById(int32_t sort_order_id)
const;
156 Result<std::shared_ptr<iceberg::Snapshot>>
Snapshot()
const;
159 Result<std::shared_ptr<iceberg::Snapshot>> SnapshotById(int64_t snapshot_id)
const;
161 int64_t NextSequenceNumber()
const;
163 ICEBERG_EXPORT
friend bool operator==(
const TableMetadata& lhs,
172 template <
typename T>
173 using ByIdMap = std::unordered_map<int32_t, std::shared_ptr<T>>;
174 using SchemasMap = ByIdMap<Schema>;
175 using PartitionSpecsMap = ByIdMap<PartitionSpec>;
176 using SortOrdersMap = ByIdMap<SortOrder>;
177 using SnapshotsMap = std::unordered_map<int64_t, std::shared_ptr<Snapshot>>;
178 using SchemasMapRef = std::reference_wrapper<const SchemasMap>;
179 using PartitionSpecsMapRef = std::reference_wrapper<const PartitionSpecsMap>;
180 using SortOrdersMapRef = std::reference_wrapper<const SortOrdersMap>;
181 using SnapshotsMapRef = std::reference_wrapper<const SnapshotsMap>;
183 Result<SchemasMapRef> GetSchemasById()
const;
184 Result<PartitionSpecsMapRef> GetPartitionSpecsById()
const;
185 Result<SortOrdersMapRef> GetSortOrdersById()
const;
186 Result<SnapshotsMapRef> GetSnapshotsById()
const;
189 static Result<SchemasMap> InitSchemasMap(
const TableMetadata* metadata);
190 static Result<PartitionSpecsMap> InitPartitionSpecsMap(
const TableMetadata* metadata);
191 static Result<SortOrdersMap> InitSortOrdersMap(
const TableMetadata* metadata);
192 static Result<SnapshotsMap> InitSnapshotMap(
const TableMetadata* metadata);
195 Lazy<InitSchemasMap> schemas_map_;
196 Lazy<InitPartitionSpecsMap> partition_specs_map_;
197 Lazy<InitSortOrdersMap> sort_orders_map_;
198 Lazy<InitSnapshotMap> snapshot_map_;
226 static std::unique_ptr<TableMetadataBuilder> BuildFromEmpty(
227 int8_t format_version = TableMetadata::kDefaultTableFormatVersion);
234 static std::unique_ptr<TableMetadataBuilder> BuildFrom(
const TableMetadata* base);
253 std::string_view previous_metadata_location);
279 int32_t new_last_column_id);
360 const std::string& branch);
380 const std::vector<std::shared_ptr<Snapshot>>& snapshots_to_remove);
414 const std::shared_ptr<PartitionStatisticsFile>& partition_statistics_file);
427 const std::unordered_map<std::string, std::string>& updated);
456 Result<std::unique_ptr<TableMetadata>> Build();
459 const std::vector<std::unique_ptr<TableUpdate>>& changes()
const;
491enum class ICEBERG_EXPORT MetadataFileCodecType {
503 static Result<MetadataFileCodecType> FromString(std::string_view name);
509 static Result<MetadataFileCodecType> FromFileName(std::string_view file_name);
514 static Result<std::string> NameToFileExtension(std::string_view codec);
519 static std::string TypeToFileExtension(MetadataFileCodecType codec);
521 static constexpr std::string_view kTableMetadataFileSuffix =
".metadata.json";
522 static constexpr std::string_view kCompGzipTableMetadataFileSuffix =
524 static constexpr std::string_view kGzipTableMetadataFileSuffix =
".gz.metadata.json";
525 static constexpr std::string_view kGzipTableMetadataFileExtension =
".gz";
526 static constexpr std::string_view kCodecTypeGzip =
"GZIP";
527 static constexpr std::string_view kCodecTypeNone =
"NONE";
536 static Result<std::unique_ptr<TableMetadata>> Read(
537 class FileIO& io,
const std::string& location,
538 std::optional<size_t> length = std::nullopt);
551 const std::string& base_metadata_location,
571 static Status Write(
FileIO& io,
const std::string& location,
584 static int32_t ParseVersionFromLocation(std::string_view metadata_location);
591 static Result<std::string> NewTableMetadataFilePath(
const TableMetadata& metadata,
599struct hash<iceberg::MetadataLogEntry> {
Base class for collecting errors in the builder pattern.
Definition error_collector.h:93
Pluggable module for reading, writing, and deleting files.
Definition file_io.h:115
A partition spec for a Table.
Definition partition_spec.h:47
A schema for a Table.
Definition schema.h:49
A sort order for a table.
Definition sort_order.h:40
Table properties for Iceberg tables.
Definition table_properties.h:37
ICEBERG_EXPORT const std::shared_ptr< UuidType > & uuid()
Return a UuidType instance.
Represents a metadata log entry.
Definition table_metadata.h:53
std::string metadata_file
Metadata file location.
Definition table_metadata.h:57
TimePointMs timestamp_ms
The timestamp in milliseconds of the change.
Definition table_metadata.h:55
Represents a snapshot log entry.
Definition table_metadata.h:41
TimePointMs timestamp_ms
The timestamp in milliseconds of the change.
Definition table_metadata.h:43
int64_t snapshot_id
ID of the snapshot.
Definition table_metadata.h:45
A snapshot of the data in a table at a point in time.
Definition snapshot.h:389