27#include <unordered_map>
30#include "iceberg/iceberg_export.h"
32#include "iceberg/result.h"
35#include "iceberg/util/timepoint.h"
40enum class SnapshotRefType {
50ICEBERG_EXPORT
constexpr std::string_view ToString(SnapshotRefType type)
noexcept {
52 case SnapshotRefType::kBranch:
54 case SnapshotRefType::kTag:
60ICEBERG_EXPORT
constexpr Result<SnapshotRefType> SnapshotRefTypeFromString(
61 std::string_view str)
noexcept {
62 if (str ==
"branch")
return SnapshotRefType::kBranch;
63 if (str ==
"tag")
return SnapshotRefType::kTag;
64 return InvalidArgument(
"Invalid snapshot reference type: {}", str);
69 static constexpr std::string_view kMainBranch =
"main";
87 return lhs.Equals(rhs);
92 bool Equals(
const Branch& other)
const;
95 struct ICEBERG_EXPORT
Tag {
106 bool Equals(
const Tag& other)
const;
114 SnapshotRefType type() const noexcept;
116 std::optional<int64_t> max_ref_age_ms() const noexcept;
127 int64_t snapshot_id,
std::optional<int32_t> min_snapshots_to_keep =
std::nullopt,
128 std::optional<int64_t> max_snapshot_age_ms =
std::nullopt,
129 std::optional<int64_t> max_ref_age_ms =
std::nullopt);
138 int64_t snapshot_id,
std::optional<int64_t> max_ref_age_ms =
std::nullopt);
146 std::optional<int64_t> new_snapshot_id =
std::nullopt) const;
149 Status Validate() const;
153 return lhs.Equals(rhs);
164 inline static const std::string kOperation =
"operation";
166 inline static const std::string kFirstRowId =
"first-row-id";
168 inline static const std::string kAddedRows =
"added-rows";
173 inline static const std::string kAddedDataFiles =
"added-data-files";
175 inline static const std::string kDeletedDataFiles =
"deleted-data-files";
177 inline static const std::string kTotalDataFiles =
"total-data-files";
180 inline static const std::string kAddedDeleteFiles =
"added-delete-files";
182 inline static const std::string kAddedEqDeleteFiles =
"added-equality-delete-files";
184 inline static const std::string kRemovedEqDeleteFiles =
"removed-equality-delete-files";
186 inline static const std::string kAddedPosDeleteFiles =
"added-position-delete-files";
188 inline static const std::string kRemovedPosDeleteFiles =
189 "removed-position-delete-files";
191 inline static const std::string kAddedDVs =
"added-dvs";
193 inline static const std::string kRemovedDVs =
"removed-dvs";
196 inline static const std::string kRemovedDeleteFiles =
"removed-delete-files";
199 inline static const std::string kTotalDeleteFiles =
"total-delete-files";
201 inline static const std::string kAddedRecords =
"added-records";
203 inline static const std::string kDeletedRecords =
"deleted-records";
205 inline static const std::string kTotalRecords =
"total-records";
207 inline static const std::string kAddedFileSize =
"added-files-size";
209 inline static const std::string kRemovedFileSize =
"removed-files-size";
211 inline static const std::string kTotalFileSize =
"total-files-size";
213 inline static const std::string kAddedPosDeletes =
"added-position-deletes";
215 inline static const std::string kRemovedPosDeletes =
"removed-position-deletes";
217 inline static const std::string kTotalPosDeletes =
"total-position-deletes";
219 inline static const std::string kAddedEqDeletes =
"added-equality-deletes";
221 inline static const std::string kRemovedEqDeletes =
"removed-equality-deletes";
223 inline static const std::string kTotalEqDeletes =
"total-equality-deletes";
226 inline static const std::string kDeletedDuplicatedFiles =
"deleted-duplicate-files";
228 inline static const std::string kChangedPartitionCountProp =
"changed-partition-count";
230 inline static const std::string kManifestsCreated =
"manifests-created";
232 inline static const std::string kManifestsKept =
"manifests-kept";
234 inline static const std::string kManifestsReplaced =
"manifests-replaced";
236 inline static const std::string kEntriesProcessed =
"entries-processed";
238 inline static const std::string kChangedPartitionPrefix =
"partitions.";
240 inline static const std::string kPartitionSummaryProp =
"partition-summaries-included";
245 inline static const std::string kWAPId =
"wap.id";
247 inline static const std::string kPublishedWAPId =
"published-wap-id";
249 inline static const std::string kSourceSnapshotId =
"source-snapshot-id";
251 inline static const std::string kEngineName =
"engine-name";
253 inline static const std::string kEngineVersion =
"engine-version";
263 class UpdateMetrics {
266 void AddTo(std::unordered_map<std::string, std::string>& builder)
const;
267 void AddedFile(
const DataFile& file);
268 void RemovedFile(
const DataFile& file);
270 void Merge(
const UpdateMetrics& other);
273 int64_t added_size_{0};
274 int64_t removed_size_{0};
275 int32_t added_files_{0};
276 int32_t removed_files_{0};
277 int32_t added_eq_delete_files_{0};
278 int32_t removed_eq_delete_files_{0};
279 int32_t added_pos_delete_files_{0};
280 int32_t removed_pos_delete_files_{0};
281 int32_t added_dvs_{0};
282 int32_t removed_dvs_{0};
283 int32_t added_delete_files_{0};
284 int32_t removed_delete_files_{0};
285 int64_t added_records_{0};
286 int64_t deleted_records_{0};
287 int64_t added_pos_deletes_{0};
288 int64_t removed_pos_deletes_{0};
289 int64_t added_eq_deletes_{0};
290 int64_t removed_eq_deletes_{0};
291 bool trust_size_and_delete_counts_{
true};
309 void SetPartitionSummaryLimit(int32_t max);
314 void IncrementDuplicateDeletes(int32_t increment = 1);
339 void Set(
const std::string& property,
const std::string& value);
349 std::unordered_map<std::string, std::string> Build()
const;
356 std::unordered_map<std::string, std::string> properties_;
357 std::unordered_map<std::string, UpdateMetrics> partition_metrics_;
358 UpdateMetrics metrics_;
359 int32_t max_changed_partitions_for_summaries_{0};
360 int64_t deleted_duplicate_files_{0};
361 bool trust_partition_metrics_{
true};
371 inline static const std::string kAppend =
"append";
374 inline static const std::string kReplace =
"replace";
377 inline static const std::string kOverwrite =
"overwrite";
380 inline static const std::string kDelete =
"delete";
403 std::unordered_map<std::string, std::string>
summary;
408 static Result<std::unique_ptr<Snapshot>> Make(
409 int64_t sequence_number, int64_t snapshot_id,
410 std::optional<int64_t> parent_snapshot_id, TimePointMs timestamp_ms,
411 std::string operation, std::unordered_map<std::string, std::string> summary,
412 std::optional<int32_t> schema_id, std::string manifest_list,
413 std::optional<int64_t> first_row_id = std::nullopt,
414 std::optional<int64_t> added_rows = std::nullopt);
421 std::optional<std::string_view> Operation()
const;
432 Result<std::optional<int64_t>> FirstRowId()
const;
444 Result<std::optional<int64_t>> AddedRows()
const;
448 return lhs.Equals(rhs);
453 bool Equals(
const Snapshot& other)
const;
471 Result<std::span<ManifestFile>> Manifests(std::shared_ptr<FileIO> file_io)
const;
477 Result<std::span<ManifestFile>> DataManifests(std::shared_ptr<FileIO> file_io)
const;
483 Result<std::span<ManifestFile>> DeleteManifests(std::shared_ptr<FileIO> file_io)
const;
490 using ManifestsCache = std::pair<std::vector<ManifestFile>,
size_t>;
496 static Result<ManifestsCache> InitManifestsCache(
const Snapshot* snapshot,
497 std::shared_ptr<FileIO> file_io);
503 Lazy<InitManifestsCache> manifests_cache_;
A partition spec for a Table.
Definition partition_spec.h:47
Maintains statistics for each partition field and produces the partition field summaries.
Definition partition_summary_internal.h:51
A snapshot with cached manifest loading capabilities.
Definition snapshot.h:459
const Snapshot & snapshot() const
Get the underlying Snapshot reference.
Definition snapshot.h:464
Helper class for building snapshot summaries.
Definition snapshot.h:260
DataFile carries data file path, partition tuple, metrics, ...
Definition manifest_entry.h:62
Data operation that produce snapshots.
Definition snapshot.h:369
Entry in a manifest list.
Definition manifest_list.h:85
std::optional< int32_t > min_snapshots_to_keep
Definition snapshot.h:75
std::optional< int64_t > max_snapshot_age_ms
Definition snapshot.h:79
std::optional< int64_t > max_ref_age_ms
Definition snapshot.h:83
friend bool operator==(const Branch &lhs, const Branch &rhs)
Compare two branches for equality.
Definition snapshot.h:86
std::optional< int64_t > max_ref_age_ms
Definition snapshot.h:99
friend bool operator==(const Tag &lhs, const Tag &rhs)
Compare two tags for equality.
Definition snapshot.h:102
A reference to a snapshot, either a branch or a tag.
Definition snapshot.h:68
int64_t snapshot_id
A reference's snapshot ID. The tagged snapshot or latest snapshot of a branch.
Definition snapshot.h:110
std::variant< Branch, Tag > retention
Snapshot retention policy.
Definition snapshot.h:112
Optional Snapshot Summary Fields.
Definition snapshot.h:162
A snapshot of the data in a table at a point in time.
Definition snapshot.h:389
friend bool operator==(const Snapshot &lhs, const Snapshot &rhs)
Compare two snapshots for equality.
Definition snapshot.h:447
int64_t snapshot_id
A unique long ID.
Definition snapshot.h:391
int64_t sequence_number
A monotonically increasing long that tracks the order of changes to a table.
Definition snapshot.h:395
std::optional< int32_t > schema_id
ID of the table's current schema when the snapshot was created.
Definition snapshot.h:405
TimePointMs timestamp_ms
Definition snapshot.h:398
std::optional< int64_t > parent_snapshot_id
The snapshot ID of the snapshot's parent. Omitted for any snapshot with no parent.
Definition snapshot.h:393
std::string manifest_list
Definition snapshot.h:401
std::unordered_map< std::string, std::string > summary
A string map that summaries the snapshot changes, including operation.
Definition snapshot.h:403