iceberg-cpp
Loading...
Searching...
No Matches
manifest_writer.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <memory>
26#include <string>
27#include <vector>
28
29#include "iceberg/file_writer.h"
30#include "iceberg/iceberg_export.h"
32#include "iceberg/metrics.h"
33#include "iceberg/result.h"
34#include "iceberg/type_fwd.h"
35
36namespace iceberg {
37
39class ICEBERG_EXPORT ManifestWriter {
40 public:
42
48 Status WriteEntry(const ManifestEntry& entry);
49
58 Status WriteAddedEntry(std::shared_ptr<DataFile> file,
59 std::optional<int64_t> data_sequence_number = std::nullopt);
62 Status WriteAddedEntry(const ManifestEntry& entry);
63
74 Status WriteExistingEntry(std::shared_ptr<DataFile> file, int64_t file_snapshot_id,
75 int64_t data_sequence_number,
76 std::optional<int64_t> file_sequence_number = std::nullopt);
79 Status WriteExistingEntry(const ManifestEntry& entry);
80
91 Status WriteDeletedEntry(std::shared_ptr<DataFile> file, int64_t data_sequence_number,
92 std::optional<int64_t> file_sequence_number = std::nullopt);
95 Status WriteDeletedEntry(const ManifestEntry& entry);
96
100 Status AddAll(const std::vector<ManifestEntry>& entries);
101
103 Status Close();
104
106 ManifestContent content() const;
107
110 Result<Metrics> metrics() const;
111
114 Result<int64_t> length() const;
115
118 Result<ManifestFile> ToManifestFile() const;
119
132 static Result<std::unique_ptr<ManifestWriter>> MakeWriter(
133 int8_t format_version, std::optional<int64_t> snapshot_id,
134 std::string_view manifest_location, std::shared_ptr<FileIO> file_io,
135 std::shared_ptr<PartitionSpec> partition_spec,
136 std::shared_ptr<Schema> current_schema,
137 ManifestContent content = ManifestContent::kData,
138 std::optional<int64_t> first_row_id = std::nullopt);
139
140 private:
141 // Private constructor for internal use only, use the static Make*Writer methods
142 // instead.
143 ManifestWriter(std::unique_ptr<Writer> writer,
144 std::unique_ptr<class ManifestEntryAdapter> adapter,
145 std::string_view manifest_location, std::optional<int64_t> first_row_id);
146
147 Status CheckDataFile(const DataFile& file) const;
148
149 static constexpr int64_t kBatchSize = 1024;
150 std::unique_ptr<Writer> writer_;
151 std::unique_ptr<class ManifestEntryAdapter> adapter_;
152 bool closed_{false};
153 std::string manifest_location_;
154 std::optional<int64_t> first_row_id_;
155
156 int32_t add_files_count_{0};
157 int32_t existing_files_count_{0};
158 int32_t delete_files_count_{0};
159 int64_t add_rows_count_{0L};
160 int64_t existing_rows_count_{0L};
161 int64_t delete_rows_count_{0L};
162 std::optional<int64_t> min_sequence_number_{std::nullopt};
163 std::unique_ptr<PartitionSummary> partition_summary_;
164};
165
167class ICEBERG_EXPORT ManifestListWriter {
168 public:
170
174 Status Add(const ManifestFile& file);
175
179 Status AddAll(const std::vector<ManifestFile>& files);
180
182 Status Close();
183
185 std::optional<int64_t> next_row_id() const;
186
198 static Result<std::unique_ptr<ManifestListWriter>> MakeWriter(
199 int8_t format_version, int64_t snapshot_id,
200 std::optional<int64_t> parent_snapshot_id, std::string_view manifest_list_location,
201 std::shared_ptr<FileIO> file_io,
202 std::optional<int64_t> sequence_number = std::nullopt,
203 std::optional<int64_t> first_row_id = std::nullopt);
204
205 private:
206 // Private constructor for internal use only, use the static Make*Writer methods
207 // instead.
208 ManifestListWriter(std::unique_ptr<Writer> writer,
209 std::unique_ptr<class ManifestFileAdapter> adapter);
210
211 static constexpr int64_t kBatchSize = 1024;
212 std::unique_ptr<Writer> writer_;
213 std::unique_ptr<class ManifestFileAdapter> adapter_;
214};
215
216} // namespace iceberg
Write manifest files to a manifest list file.
Definition manifest_writer.h:167
Write manifest entries to a manifest file.
Definition manifest_writer.h:39
ManifestContent
The type of files tracked by the manifest, either data or delete files; 0 for all v1 manifests.
Definition manifest_list.h:77
DataFile carries data file path, partition tuple, metrics, ...
Definition manifest_entry.h:62
A manifest is an immutable Avro file that lists data files or delete files, along with each file's pa...
Definition manifest_entry.h:307
Entry in a manifest list.
Definition manifest_list.h:85