iceberg-cpp
Loading...
Searching...
No Matches
manifest_list.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
23
24#include <cstdint>
25#include <optional>
26#include <string>
27#include <string_view>
28#include <utility>
29
30#include "iceberg/constants.h"
31#include "iceberg/iceberg_export.h"
33#include "iceberg/result.h"
36#include "iceberg/type.h"
37
38namespace iceberg {
39
43struct ICEBERG_EXPORT PartitionFieldSummary {
46 bool contains_null = true;
49 std::optional<bool> contains_nan;
53 std::optional<std::vector<uint8_t>> lower_bound;
57 std::optional<std::vector<uint8_t>> upper_bound;
58
59 inline static const SchemaField kContainsNull =
60 SchemaField::MakeRequired(509, "contains_null", iceberg::boolean(),
61 "True if any file has a null partition value");
62 inline static const SchemaField kContainsNaN =
63 SchemaField::MakeOptional(518, "contains_nan", iceberg::boolean(),
64 "True if any file has a nan partition value");
65 inline static const SchemaField kLowerBound = SchemaField::MakeOptional(
66 510, "lower_bound", iceberg::binary(), "Partition lower bound for all files");
67 inline static const SchemaField kUpperBound = SchemaField::MakeOptional(
68 511, "upper_bound", iceberg::binary(), "Partition upper bound for all files");
69
70 bool operator==(const PartitionFieldSummary& other) const = default;
71
72 static const std::shared_ptr<StructType>& Type();
73};
74
77enum class ManifestContent {
79 kData = 0,
81 kDeletes = 1,
82};
83
85struct ICEBERG_EXPORT ManifestFile {
88 std::string manifest_path;
91 int64_t manifest_length = 0;
95 int32_t partition_spec_id = PartitionSpec::kInitialSpecId;
99 ManifestContent content = ManifestContent::kData;
103 int64_t sequence_number = TableMetadata::kInitialSequenceNumber;
107 int64_t min_sequence_number = TableMetadata::kInitialSequenceNumber;
110 int64_t added_snapshot_id = kInvalidSnapshotId;
114 std::optional<int32_t> added_files_count;
118 std::optional<int32_t> existing_files_count;
122 std::optional<int32_t> deleted_files_count;
126 std::optional<int64_t> added_rows_count;
130 std::optional<int64_t> existing_rows_count;
134 std::optional<int64_t> deleted_rows_count;
139 std::vector<PartitionFieldSummary> partitions;
142 std::vector<uint8_t> key_metadata;
145 std::optional<int64_t> first_row_id;
146
148 bool has_added_files() const { return added_files_count.value_or(1) > 0; }
149
151 bool has_existing_files() const { return existing_files_count.value_or(1) > 0; }
152
154 bool has_deleted_files() const { return deleted_files_count.value_or(1) > 0; }
155
156 static constexpr int32_t kManifestPathFieldId = 500;
157 inline static const SchemaField kManifestPath = SchemaField::MakeRequired(
158 kManifestPathFieldId, "manifest_path", string(), "Location URI with FS scheme");
159
160 static constexpr int32_t kManifestLengthFieldId = 501;
161 inline static const SchemaField kManifestLength = SchemaField::MakeRequired(
162 kManifestLengthFieldId, "manifest_length", int64(), "Total file size in bytes");
163
164 static constexpr int32_t kPartitionSpecIdFieldId = 502;
165 inline static const SchemaField kPartitionSpecId = SchemaField::MakeRequired(
166 kPartitionSpecIdFieldId, "partition_spec_id", int32(), "Spec ID used to write");
167
168 static constexpr int32_t kContentFieldId = 517;
169 inline static const SchemaField kContent = SchemaField::MakeOptional(
170 kContentFieldId, "content", int32(), "Contents of the manifest: 0=data, 1=deletes");
171
172 static constexpr int32_t kSequenceNumberFieldId = 515;
173 inline static const SchemaField kSequenceNumber =
174 SchemaField::MakeOptional(kSequenceNumberFieldId, "sequence_number", int64(),
175 "Sequence number when the manifest was added");
176
177 static constexpr int32_t kMinSequenceNumberFieldId = 516;
178 inline static const SchemaField kMinSequenceNumber =
179 SchemaField::MakeOptional(kMinSequenceNumberFieldId, "min_sequence_number", int64(),
180 "Lowest sequence number in the manifest");
181
182 static constexpr int32_t kAddedSnapshotIdFieldId = 503;
183 inline static const SchemaField kAddedSnapshotId =
184 SchemaField::MakeRequired(kAddedSnapshotIdFieldId, "added_snapshot_id", int64(),
185 "Snapshot ID that added the manifest");
186
187 static constexpr int32_t kAddedFilesCountFieldId = 504;
188 inline static const SchemaField kAddedFilesCount = SchemaField::MakeOptional(
189 kAddedFilesCountFieldId, "added_files_count", int32(), "Added entry count");
190
191 static constexpr int32_t kExistingFilesCountFieldId = 505;
192 inline static const SchemaField kExistingFilesCount =
193 SchemaField::MakeOptional(kExistingFilesCountFieldId, "existing_files_count",
194 int32(), "Existing entry count");
195
196 static constexpr int32_t kDeletedFilesCountFieldId = 506;
197 inline static const SchemaField kDeletedFilesCount = SchemaField::MakeOptional(
198 kDeletedFilesCountFieldId, "deleted_files_count", int32(), "Deleted entry count");
199
200 static constexpr int32_t kAddedRowsCountFieldId = 512;
201 inline static const SchemaField kAddedRowsCount = SchemaField::MakeOptional(
202 kAddedRowsCountFieldId, "added_rows_count", int64(), "Added rows count");
203
204 static constexpr int32_t kExistingRowsCountFieldId = 513;
205 inline static const SchemaField kExistingRowsCount = SchemaField::MakeOptional(
206 kExistingRowsCountFieldId, "existing_rows_count", int64(), "Existing rows count");
207
208 static constexpr int32_t kDeletedRowsCountFieldId = 514;
209 inline static const SchemaField kDeletedRowsCount = SchemaField::MakeOptional(
210 kDeletedRowsCountFieldId, "deleted_rows_count", int64(), "Deleted rows count");
211
212 static constexpr int32_t kPartitionSummaryFieldId = 507;
213 inline static const SchemaField kPartitions = SchemaField::MakeOptional(
214 kPartitionSummaryFieldId, "partitions",
215 list(SchemaField::MakeRequired(508, std::string(ListType::kElementName),
216 struct_({
217 PartitionFieldSummary::kContainsNull,
218 PartitionFieldSummary::kContainsNaN,
219 PartitionFieldSummary::kLowerBound,
220 PartitionFieldSummary::kUpperBound,
221 }))),
222 "Summary for each partition");
223
224 static constexpr int32_t kKeyMetadataFieldId = 519;
225 inline static const SchemaField kKeyMetadata = SchemaField::MakeOptional(
226 kKeyMetadataFieldId, "key_metadata", binary(), "Encryption key metadata blob");
227
228 static constexpr int32_t kFirstRowIdFieldId = 520;
229 inline static const SchemaField kFirstRowId = SchemaField::MakeOptional(
230 kFirstRowIdFieldId, "first_row_id", int64(),
231 "Starting row ID to assign to new rows in ADDED data files");
232
233 bool operator==(const ManifestFile& other) const {
234 return manifest_path == other.manifest_path;
235 }
236
237 static const std::shared_ptr<StructType>& Type();
238};
239
252struct ICEBERG_EXPORT ManifestList {
254 std::vector<ManifestFile> entries;
255};
256
258ICEBERG_EXPORT inline constexpr std::string_view ToString(ManifestContent type) noexcept {
259 switch (type) {
260 case ManifestContent::kData:
261 return "data";
262 case ManifestContent::kDeletes:
263 return "deletes";
264 }
265 std::unreachable();
266}
267
269ICEBERG_EXPORT inline constexpr Result<ManifestContent> ManifestContentFromString(
270 std::string_view str) noexcept {
271 if (str == "data") return ManifestContent::kData;
272 if (str == "deletes") return ManifestContent::kDeletes;
273 return InvalidArgument("Invalid manifest content type: {}", str);
274}
275
276} // namespace iceberg
277
278namespace std {
279template <>
280struct hash<iceberg::ManifestFile> {
281 size_t operator()(const iceberg::ManifestFile& manifest_file) const {
282 return std::hash<std::string>{}(manifest_file.manifest_path);
283 }
284};
285} // namespace std
A type combined with a name.
Definition schema_field.h:39
Interface for a data type for a field.
Definition type.h:44
ICEBERG_EXPORT const std::shared_ptr< BinaryType > & binary()
Return a BinaryType instance.
std::shared_ptr< ListType > list(SchemaField element)
Create a ListType with the given element field.
Definition type.cc:392
ICEBERG_EXPORT const std::shared_ptr< IntType > & int32()
Return an IntType instance.
std::shared_ptr< StructType > struct_(std::vector< SchemaField > fields)
Create a StructType with the given fields.
Definition type.cc:396
ICEBERG_EXPORT const std::shared_ptr< BooleanType > & boolean()
Return a BooleanType instance.
ICEBERG_EXPORT const std::shared_ptr< LongType > & int64()
Return a LongType instance.
ManifestContent
The type of files tracked by the manifest, either data or delete files; 0 for all v1 manifests.
Definition manifest_list.h:77
@ kData
The manifest content is data.
@ kDeletes
The manifest content is deletes.
ICEBERG_EXPORT constexpr Result< ManifestContent > ManifestContentFromString(std::string_view str) noexcept
Get the relative manifest content type from name.
Definition manifest_list.h:269
STL namespace.
Entry in a manifest list.
Definition manifest_list.h:85
std::vector< PartitionFieldSummary > partitions
Definition manifest_list.h:139
std::optional< int64_t > deleted_rows_count
Definition manifest_list.h:134
std::vector< uint8_t > key_metadata
Definition manifest_list.h:142
std::optional< int32_t > existing_files_count
Definition manifest_list.h:118
bool has_existing_files() const
Checks if this manifest file contains entries with EXISTING status.
Definition manifest_list.h:151
std::optional< int64_t > added_rows_count
Definition manifest_list.h:126
std::optional< int64_t > first_row_id
Definition manifest_list.h:145
std::string manifest_path
Definition manifest_list.h:88
std::optional< int32_t > added_files_count
Definition manifest_list.h:114
std::optional< int32_t > deleted_files_count
Definition manifest_list.h:122
bool has_added_files() const
Checks if this manifest file contains entries with ADDED status.
Definition manifest_list.h:148
std::optional< int64_t > existing_rows_count
Definition manifest_list.h:130
bool has_deleted_files() const
Checks if this manifest file contains entries with DELETED status.
Definition manifest_list.h:154
Definition manifest_list.h:252
std::vector< ManifestFile > entries
Entries in a manifest list.
Definition manifest_list.h:254
Field summary for partition field in the spec.
Definition manifest_list.h:43
std::optional< std::vector< uint8_t > > lower_bound
Definition manifest_list.h:53
std::optional< std::vector< uint8_t > > upper_bound
Definition manifest_list.h:57
std::optional< bool > contains_nan
Definition manifest_list.h:49