iceberg-cpp
Loading...
Searching...
No Matches
table_metadata.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <memory>
26#include <string>
27#include <string_view>
28#include <unordered_map>
29#include <vector>
30
31#include "iceberg/iceberg_export.h"
32#include "iceberg/table_properties.h"
33#include "iceberg/type_fwd.h"
35#include "iceberg/util/lazy.h"
36#include "iceberg/util/timepoint.h"
37
38namespace iceberg {
39
41struct ICEBERG_EXPORT SnapshotLogEntry {
43 TimePointMs timestamp_ms;
45 int64_t snapshot_id;
46
47 friend bool operator==(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) {
48 return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id;
49 }
50};
51
53struct ICEBERG_EXPORT MetadataLogEntry {
55 TimePointMs timestamp_ms;
57 std::string metadata_file;
58
59 friend bool operator==(const MetadataLogEntry& lhs, const MetadataLogEntry& rhs) {
60 return lhs.timestamp_ms == rhs.timestamp_ms && lhs.metadata_file == rhs.metadata_file;
61 }
62};
63
72struct ICEBERG_EXPORT TableMetadata {
73 static constexpr int8_t kDefaultTableFormatVersion = 2;
74 static constexpr int8_t kSupportedTableFormatVersion = 3;
75 static constexpr int8_t kMinFormatVersionRowLineage = 3;
76 static constexpr int8_t kMinFormatVersionDefaultValues = 3;
77 static constexpr int64_t kInitialSequenceNumber = 0;
78 static constexpr int64_t kInitialRowId = 0;
79
80 static inline const std::unordered_map<TypeId, int8_t> kMinFormatVersions = {};
81
85 std::string table_uuid;
87 std::string location;
91 TimePointMs last_updated_ms;
95 std::vector<std::shared_ptr<iceberg::Schema>> schemas;
99 std::vector<std::shared_ptr<iceberg::PartitionSpec>> partition_specs;
109 std::vector<std::shared_ptr<iceberg::Snapshot>> snapshots;
112 std::vector<SnapshotLogEntry> snapshot_log;
115 std::vector<MetadataLogEntry> metadata_log;
117 std::vector<std::shared_ptr<iceberg::SortOrder>> sort_orders;
121 std::unordered_map<std::string, std::shared_ptr<SnapshotRef>> refs;
123 std::vector<std::shared_ptr<struct StatisticsFile>> statistics;
125 std::vector<std::shared_ptr<struct PartitionStatisticsFile>> partition_statistics;
127 int64_t next_row_id;
128
129 static Result<std::unique_ptr<TableMetadata>> Make(
130 const iceberg::Schema& schema, const iceberg::PartitionSpec& spec,
131 const iceberg::SortOrder& sort_order, const std::string& location,
132 const std::unordered_map<std::string, std::string>& properties,
133 int format_version = kDefaultTableFormatVersion);
134
137 Result<std::shared_ptr<iceberg::Schema>> Schema() const;
140 Result<std::shared_ptr<iceberg::Schema>> SchemaById(int32_t schema_id) const;
143 Result<std::shared_ptr<iceberg::PartitionSpec>> PartitionSpec() const;
146 Result<std::shared_ptr<iceberg::PartitionSpec>> PartitionSpecById(
147 int32_t spec_id) const;
150 Result<std::shared_ptr<iceberg::SortOrder>> SortOrder() const;
153 Result<std::shared_ptr<iceberg::SortOrder>> SortOrderById(int32_t sort_order_id) const;
156 Result<std::shared_ptr<iceberg::Snapshot>> Snapshot() const;
159 Result<std::shared_ptr<iceberg::Snapshot>> SnapshotById(int64_t snapshot_id) const;
161 int64_t NextSequenceNumber() const;
162
163 ICEBERG_EXPORT friend bool operator==(const TableMetadata& lhs,
164 const TableMetadata& rhs);
165};
166
167// Cache for table metadata mappings to facilitate fast lookups.
168class ICEBERG_EXPORT TableMetadataCache {
169 public:
170 explicit TableMetadataCache(const TableMetadata* metadata) : metadata_(metadata) {}
171
172 template <typename T>
173 using ByIdMap = std::unordered_map<int32_t, std::shared_ptr<T>>;
174 using SchemasMap = ByIdMap<Schema>;
175 using PartitionSpecsMap = ByIdMap<PartitionSpec>;
176 using SortOrdersMap = ByIdMap<SortOrder>;
177 using SnapshotsMap = std::unordered_map<int64_t, std::shared_ptr<Snapshot>>;
178 using SchemasMapRef = std::reference_wrapper<const SchemasMap>;
179 using PartitionSpecsMapRef = std::reference_wrapper<const PartitionSpecsMap>;
180 using SortOrdersMapRef = std::reference_wrapper<const SortOrdersMap>;
181 using SnapshotsMapRef = std::reference_wrapper<const SnapshotsMap>;
182
183 Result<SchemasMapRef> GetSchemasById() const;
184 Result<PartitionSpecsMapRef> GetPartitionSpecsById() const;
185 Result<SortOrdersMapRef> GetSortOrdersById() const;
186 Result<SnapshotsMapRef> GetSnapshotsById() const;
187
188 private:
189 static Result<SchemasMap> InitSchemasMap(const TableMetadata* metadata);
190 static Result<PartitionSpecsMap> InitPartitionSpecsMap(const TableMetadata* metadata);
191 static Result<SortOrdersMap> InitSortOrdersMap(const TableMetadata* metadata);
192 static Result<SnapshotsMap> InitSnapshotMap(const TableMetadata* metadata);
193
194 const TableMetadata* metadata_;
195 Lazy<InitSchemasMap> schemas_map_;
196 Lazy<InitPartitionSpecsMap> partition_specs_map_;
197 Lazy<InitSortOrdersMap> sort_orders_map_;
198 Lazy<InitSnapshotMap> snapshot_map_;
199};
200
202ICEBERG_EXPORT std::string ToString(const SnapshotLogEntry& entry);
203
205ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
206
220class ICEBERG_EXPORT TableMetadataBuilder : public ErrorCollector {
221 public:
226 static std::unique_ptr<TableMetadataBuilder> BuildFromEmpty(
227 int8_t format_version = TableMetadata::kDefaultTableFormatVersion);
228
234 static std::unique_ptr<TableMetadataBuilder> BuildFrom(const TableMetadata* base);
235
240 TableMetadataBuilder& ApplyChangesForCreate(const TableMetadata& base);
241
246 TableMetadataBuilder& SetMetadataLocation(std::string_view metadata_location);
247
252 TableMetadataBuilder& SetPreviousMetadataLocation(
253 std::string_view previous_metadata_location);
254
259 TableMetadataBuilder& AssignUUID();
260
265 TableMetadataBuilder& AssignUUID(std::string_view uuid);
266
271 TableMetadataBuilder& UpgradeFormatVersion(int8_t new_format_version);
272
278 TableMetadataBuilder& SetCurrentSchema(const std::shared_ptr<Schema>& schema,
279 int32_t new_last_column_id);
280
285 TableMetadataBuilder& SetCurrentSchema(int32_t schema_id);
286
291 TableMetadataBuilder& AddSchema(const std::shared_ptr<Schema>& schema);
292
297 TableMetadataBuilder& SetDefaultPartitionSpec(std::shared_ptr<PartitionSpec> spec);
298
303 TableMetadataBuilder& SetDefaultPartitionSpec(int32_t spec_id);
304
309 TableMetadataBuilder& AddPartitionSpec(std::shared_ptr<PartitionSpec> spec);
310
315 TableMetadataBuilder& RemovePartitionSpecs(const std::vector<int32_t>& spec_ids);
316
321 TableMetadataBuilder& RemoveSchemas(const std::unordered_set<int32_t>& schema_ids);
322
327 TableMetadataBuilder& SetDefaultSortOrder(std::shared_ptr<SortOrder> order);
328
333 TableMetadataBuilder& SetDefaultSortOrder(int32_t order_id);
334
339 TableMetadataBuilder& AddSortOrder(std::shared_ptr<SortOrder> order);
340
345 TableMetadataBuilder& AddSnapshot(std::shared_ptr<Snapshot> snapshot);
346
352 TableMetadataBuilder& SetBranchSnapshot(int64_t snapshot_id, const std::string& branch);
353
359 TableMetadataBuilder& SetBranchSnapshot(std::shared_ptr<Snapshot> snapshot,
360 const std::string& branch);
361
367 TableMetadataBuilder& SetRef(const std::string& name, std::shared_ptr<SnapshotRef> ref);
368
373 TableMetadataBuilder& RemoveRef(const std::string& name);
374
379 TableMetadataBuilder& RemoveSnapshots(
380 const std::vector<std::shared_ptr<Snapshot>>& snapshots_to_remove);
381
386 TableMetadataBuilder& RemoveSnapshots(const std::vector<int64_t>& snapshot_ids);
387
395 TableMetadataBuilder& SuppressHistoricalSnapshots();
396
401 TableMetadataBuilder& SetStatistics(std::shared_ptr<StatisticsFile> statistics_file);
402
407 TableMetadataBuilder& RemoveStatistics(int64_t snapshot_id);
408
413 TableMetadataBuilder& SetPartitionStatistics(
414 const std::shared_ptr<PartitionStatisticsFile>& partition_statistics_file);
415
420 TableMetadataBuilder& RemovePartitionStatistics(int64_t snapshot_id);
421
426 TableMetadataBuilder& SetProperties(
427 const std::unordered_map<std::string, std::string>& updated);
428
433 TableMetadataBuilder& RemoveProperties(const std::unordered_set<std::string>& removed);
434
439 TableMetadataBuilder& SetLocation(std::string_view location);
440
445 TableMetadataBuilder& AddEncryptionKey(std::shared_ptr<EncryptedKey> key);
446
451 TableMetadataBuilder& RemoveEncryptionKey(std::string_view key_id);
452
456 Result<std::unique_ptr<TableMetadata>> Build();
457
459 const std::vector<std::unique_ptr<TableUpdate>>& changes() const;
460
462 const TableMetadata* base() const;
463
465 const TableMetadata& current() const;
466
469
470 // Delete copy operations (use BuildFrom to create a new builder)
472 TableMetadataBuilder& operator=(const TableMetadataBuilder&) = delete;
473
474 // Enable move operations
476 TableMetadataBuilder& operator=(TableMetadataBuilder&&) noexcept;
477
478 private:
480 explicit TableMetadataBuilder(int8_t format_version);
481
483 explicit TableMetadataBuilder(const TableMetadata* base);
484
486 struct Impl;
487 std::unique_ptr<Impl> impl_;
488};
489
491enum class ICEBERG_EXPORT MetadataFileCodecType {
492 kNone,
493 kGzip,
494};
495
497struct ICEBERG_EXPORT TableMetadataUtil {
498 struct ICEBERG_EXPORT Codec {
503 static Result<MetadataFileCodecType> FromString(std::string_view name);
504
509 static Result<MetadataFileCodecType> FromFileName(std::string_view file_name);
510
514 static Result<std::string> NameToFileExtension(std::string_view codec);
515
519 static std::string TypeToFileExtension(MetadataFileCodecType codec);
520
521 static constexpr std::string_view kTableMetadataFileSuffix = ".metadata.json";
522 static constexpr std::string_view kCompGzipTableMetadataFileSuffix =
523 ".metadata.json.gz";
524 static constexpr std::string_view kGzipTableMetadataFileSuffix = ".gz.metadata.json";
525 static constexpr std::string_view kGzipTableMetadataFileExtension = ".gz";
526 static constexpr std::string_view kCodecTypeGzip = "GZIP";
527 static constexpr std::string_view kCodecTypeNone = "NONE";
528 };
529
536 static Result<std::unique_ptr<TableMetadata>> Read(
537 class FileIO& io, const std::string& location,
538 std::optional<size_t> length = std::nullopt);
539
550 static Result<std::string> Write(FileIO& io, const TableMetadata* base,
551 const std::string& base_metadata_location,
552 TableMetadata& metadata);
553
563 static void DeleteRemovedMetadataFiles(FileIO& io, const TableMetadata* base,
564 const TableMetadata& metadata);
565
571 static Status Write(FileIO& io, const std::string& location,
572 const TableMetadata& metadata);
573
574 private:
584 static int32_t ParseVersionFromLocation(std::string_view metadata_location);
585
591 static Result<std::string> NewTableMetadataFilePath(const TableMetadata& metadata,
592 int32_t version);
593};
594
595} // namespace iceberg
596
597namespace std {
598template <>
599struct hash<iceberg::MetadataLogEntry> {
600 size_t operator()(const iceberg::MetadataLogEntry& m) const noexcept {
601 return std::hash<std::string>{}(m.metadata_file);
602 }
603};
604
605} // namespace std
Base class for collecting errors in the builder pattern.
Definition error_collector.h:93
Pluggable module for reading, writing, and deleting files.
Definition file_io.h:115
A partition spec for a Table.
Definition partition_spec.h:47
A schema for a Table.
Definition schema.h:49
A sort order for a table.
Definition sort_order.h:40
Definition table_metadata.cc:545
Builder class for constructing TableMetadata objects.
Definition table_metadata.h:220
~TableMetadataBuilder() override
Destructor.
Definition table_metadata.h:168
Table properties for Iceberg tables.
Definition table_properties.h:37
ICEBERG_EXPORT const std::shared_ptr< UuidType > & uuid()
Return a UuidType instance.
STL namespace.
Represents a metadata log entry.
Definition table_metadata.h:53
std::string metadata_file
Metadata file location.
Definition table_metadata.h:57
TimePointMs timestamp_ms
The timestamp in milliseconds of the change.
Definition table_metadata.h:55
Represents a snapshot log entry.
Definition table_metadata.h:41
TimePointMs timestamp_ms
The timestamp in milliseconds of the change.
Definition table_metadata.h:43
int64_t snapshot_id
ID of the snapshot.
Definition table_metadata.h:45
A snapshot of the data in a table at a point in time.
Definition snapshot.h:389
Definition table_metadata.h:498
Utility class for table metadata.
Definition table_metadata.h:497
Represents the metadata for an Iceberg table.
Definition table_metadata.h:72
int64_t current_snapshot_id
ID of the current table snapshot.
Definition table_metadata.h:107
TimePointMs last_updated_ms
Timestamp in milliseconds from the unix epoch when the table was last updated.
Definition table_metadata.h:91
int32_t last_partition_id
The highest assigned partition field ID across all partition specs for the table.
Definition table_metadata.h:103
std::vector< std::shared_ptr< iceberg::PartitionSpec > > partition_specs
A list of partition specs.
Definition table_metadata.h:99
std::vector< MetadataLogEntry > metadata_log
Definition table_metadata.h:115
int32_t last_column_id
The highest assigned column ID for the table.
Definition table_metadata.h:93
int32_t default_spec_id
ID of the current partition spec that writers should use by default.
Definition table_metadata.h:101
std::unordered_map< std::string, std::shared_ptr< SnapshotRef > > refs
A map of snapshot references.
Definition table_metadata.h:121
TableProperties properties
A string to string map of table properties.
Definition table_metadata.h:105
std::vector< std::shared_ptr< iceberg::Snapshot > > snapshots
A list of valid snapshots.
Definition table_metadata.h:109
std::vector< std::shared_ptr< iceberg::SortOrder > > sort_orders
A list of sort orders.
Definition table_metadata.h:117
int32_t default_sort_order_id
Default sort order id of the table.
Definition table_metadata.h:119
int64_t last_sequence_number
The table's highest assigned sequence number.
Definition table_metadata.h:89
std::vector< std::shared_ptr< struct PartitionStatisticsFile > > partition_statistics
A list of partition statistics.
Definition table_metadata.h:125
int64_t next_row_id
A long higher than all assigned row IDs.
Definition table_metadata.h:127
std::string location
The table's base location.
Definition table_metadata.h:87
std::vector< SnapshotLogEntry > snapshot_log
Definition table_metadata.h:112
std::vector< std::shared_ptr< iceberg::Schema > > schemas
A list of schemas.
Definition table_metadata.h:95
std::vector< std::shared_ptr< struct StatisticsFile > > statistics
A list of table statistics.
Definition table_metadata.h:123
int8_t format_version
An integer version number for the format.
Definition table_metadata.h:83
int32_t current_schema_id
ID of the table's current schema.
Definition table_metadata.h:97
std::string table_uuid
A UUID that identifies the table.
Definition table_metadata.h:85