iceberg-cpp
Loading...
Searching...
No Matches
snapshot_update.h
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
22#include <functional>
23#include <memory>
24#include <optional>
25#include <span>
26#include <string>
27#include <unordered_map>
28#include <unordered_set>
29#include <vector>
30
31#include "iceberg/iceberg_export.h"
32#include "iceberg/result.h"
33#include "iceberg/snapshot.h"
34#include "iceberg/type_fwd.h"
36
37namespace iceberg {
38
43class ICEBERG_EXPORT SnapshotUpdate : public PendingUpdate {
44 public:
46 struct ApplyResult {
47 std::shared_ptr<Snapshot> snapshot;
48 std::string target_branch;
49 bool stage_only = false;
50 };
51
52 ~SnapshotUpdate() override;
53
54 Kind kind() const override { return Kind::kUpdateSnapshot; }
55 bool IsRetryable() const override { return true; }
56
62 auto& DeleteWith(this auto& self,
63 std::function<Status(const std::string&)> delete_func) {
64 if (self.delete_func_) {
65 return self.AddError(ErrorKind::kInvalidArgument,
66 "Cannot set delete callback more than once");
67 }
68 self.delete_func_ = std::move(delete_func);
69 return self;
70 }
71
75 auto& StageOnly(this auto& self) {
76 self.stage_only_ = true;
77 return self;
78 }
79
84 auto& SetTargetBranch(this auto& self, const std::string& branch) {
85 if (branch.empty()) [[unlikely]] {
86 return self.AddError(ErrorKind::kInvalidArgument, "Branch name cannot be empty");
87 }
88
89 if (auto ref_it = self.base().refs.find(branch); ref_it != self.base().refs.end()) {
90 if (ref_it->second->type() != SnapshotRefType::kBranch) {
91 return self.AddError(ErrorKind::kInvalidArgument,
92 "{} is a tag, not a branch. Tags cannot be targets for "
93 "producing snapshots",
94 branch);
95 }
96 }
97
98 self.target_branch_ = branch;
99 return self;
100 }
101
107 auto& Set(this auto& self, const std::string& property, const std::string& value) {
108 self.summary_.Set(property, value);
109 return self;
110 }
111
119 Result<ApplyResult> Apply();
120
122 Status Finalize(Result<const TableMetadata*> commit_result) override;
123
124 protected:
125 explicit SnapshotUpdate(std::shared_ptr<TransactionContext> ctx);
126
133 Result<std::vector<ManifestFile>> WriteDataManifests(
134 std::span<const std::shared_ptr<DataFile>> files,
135 const std::shared_ptr<PartitionSpec>& spec,
136 std::optional<int64_t> data_sequence_number = std::nullopt);
137
143 Result<std::vector<ManifestFile>> WriteDeleteManifests(
144 std::span<const std::shared_ptr<DataFile>> files,
145 const std::shared_ptr<PartitionSpec>& spec);
146
147 const std::string& target_branch() const { return target_branch_; }
148 bool can_inherit_snapshot_id() const { return can_inherit_snapshot_id_; }
149 const std::string& commit_uuid() const { return commit_uuid_; }
150 int32_t manifest_count() const { return manifest_count_; }
151 int32_t attempt() const { return attempt_; }
152 int64_t target_manifest_size_bytes() const { return target_manifest_size_bytes_; }
153
164 virtual void CleanUncommitted(const std::unordered_set<std::string>& committed) = 0;
165
169 virtual std::string operation() = 0;
170
177 virtual Status Validate(const TableMetadata& current_metadata,
178 const std::shared_ptr<Snapshot>& snapshot) {
179 return {};
180 };
181
187 virtual Result<std::vector<ManifestFile>> Apply(
188 const TableMetadata& metadata_to_update,
189 const std::shared_ptr<Snapshot>& snapshot) = 0;
190
194 virtual std::unordered_map<std::string, std::string> Summary() = 0;
195
199 virtual bool CleanupAfterCommit() const { return true; }
200
202 int64_t SnapshotId();
203
208 Status DeleteFile(const std::string& path);
209
210 std::string ManifestPath();
211 std::string ManifestListPath();
212 SnapshotSummaryBuilder& summary_builder() { return summary_; }
213
214 private:
216 Result<std::unordered_map<std::string, std::string>> ComputeSummary(
217 const TableMetadata& previous);
218
220 void CleanAll();
221
222 protected:
223 SnapshotSummaryBuilder summary_;
224
225 private:
226 const bool can_inherit_snapshot_id_{true};
227 const std::string commit_uuid_;
228 int32_t manifest_count_{0};
229 int32_t attempt_{0};
230 std::vector<std::string> manifest_lists_;
231 const int64_t target_manifest_size_bytes_;
232 std::optional<int64_t> snapshot_id_;
233 bool stage_only_{false};
234 std::function<Status(const std::string&)> delete_func_;
235 std::string target_branch_{SnapshotRef::kMainBranch};
236 std::shared_ptr<Snapshot> staged_snapshot_;
237};
238
239} // namespace iceberg
Base class for all kinds of table metadata updates.
Definition pending_update.h:41
Helper class for building snapshot summaries.
Definition snapshot.h:260
Base class for operations that produce snapshots.
Definition snapshot_update.h:43
virtual Result< std::vector< ManifestFile > > Apply(const TableMetadata &metadata_to_update, const std::shared_ptr< Snapshot > &snapshot)=0
Apply the update's changes to the given metadata and snapshot.
virtual std::unordered_map< std::string, std::string > Summary()=0
Get the summary map for this operation.
auto & StageOnly(this auto &self)
Stage a snapshot in table metadata, but not update the current snapshot id.
Definition snapshot_update.h:75
auto & SetTargetBranch(this auto &self, const std::string &branch)
Perform operations on a particular branch.
Definition snapshot_update.h:84
virtual void CleanUncommitted(const std::unordered_set< std::string > &committed)=0
Clean up any uncommitted manifests that were created.
virtual std::string operation()=0
A string that describes the action that produced the new snapshot.
bool IsRetryable() const override
Whether this update can be retried after a commit conflict.
Definition snapshot_update.h:55
virtual Status Validate(const TableMetadata &current_metadata, const std::shared_ptr< Snapshot > &snapshot)
Validate the current metadata.
Definition snapshot_update.h:177
Kind kind() const override
Return the kind of this pending update.
Definition snapshot_update.h:54
auto & Set(this auto &self, const std::string &property, const std::string &value)
Set a summary property.
Definition snapshot_update.h:107
virtual bool CleanupAfterCommit() const
Check if cleanup should happen after commit.
Definition snapshot_update.h:199
auto & DeleteWith(this auto &self, std::function< Status(const std::string &)> delete_func)
Set a callback to delete files instead of the table's default.
Definition snapshot_update.h:62
Result of applying a snapshot update.
Definition snapshot_update.h:46
Represents the metadata for an Iceberg table.
Definition table_metadata.h:72