79 FileFormatType file_format = FileFormatType::kParquet;
86 int64_t record_count = 0;
89 int64_t file_size_in_bytes = 0;
182 static constexpr int32_t kContentFieldId = 134;
183 inline static const SchemaField kContent = SchemaField::MakeOptional(
184 kContentFieldId,
"content",
int32(),
185 "Contents of the file: 0=data, 1=position deletes, 2=equality deletes");
187 static constexpr int32_t kFilePathFieldId = 100;
188 inline static const SchemaField kFilePath = SchemaField::MakeRequired(
189 kFilePathFieldId,
"file_path",
string(),
"Location URI with FS scheme");
191 static constexpr int32_t kFileFormatFieldId = 101;
193 SchemaField::MakeRequired(kFileFormatFieldId,
"file_format",
string(),
194 "File format name: avro, orc, or parquet");
196 static constexpr int32_t kPartitionFieldId = 102;
197 inline static const std::string kPartitionField =
"partition";
198 inline static const std::string kPartitionDoc =
199 "Partition data tuple, schema based on the partition spec";
201 static constexpr int32_t kRecordCountFieldId = 103;
202 inline static const SchemaField kRecordCount = SchemaField::MakeRequired(
203 kRecordCountFieldId,
"record_count",
int64(),
"Number of records in the file");
205 static constexpr int32_t kFileSizeFieldId = 104;
206 inline static const SchemaField kFileSize = SchemaField::MakeRequired(
207 kFileSizeFieldId,
"file_size_in_bytes",
int64(),
"Total file size in bytes");
209 static constexpr int32_t kColumnSizesFieldId = 108;
210 inline static const SchemaField kColumnSizes = SchemaField::MakeOptional(
211 kColumnSizesFieldId,
"column_sizes",
212 map(SchemaField::MakeRequired(117, std::string(MapType::kKeyName),
int32()),
213 SchemaField::MakeRequired(118, std::string(MapType::kValueName),
int64())),
214 "Map of column id to total size on disk");
216 static constexpr int32_t kValueCountsFieldId = 109;
217 inline static const SchemaField kValueCounts = SchemaField::MakeOptional(
218 kValueCountsFieldId,
"value_counts",
219 map(SchemaField::MakeRequired(119, std::string(MapType::kKeyName),
int32()),
220 SchemaField::MakeRequired(120, std::string(MapType::kValueName),
int64())),
221 "Map of column id to total count, including null and NaN");
223 static constexpr int32_t kNullValueCountsFieldId = 110;
224 inline static const SchemaField kNullValueCounts = SchemaField::MakeOptional(
225 kNullValueCountsFieldId,
"null_value_counts",
226 map(SchemaField::MakeRequired(121, std::string(MapType::kKeyName),
int32()),
227 SchemaField::MakeRequired(122, std::string(MapType::kValueName),
int64())),
228 "Map of column id to null value count");
230 static constexpr int32_t kNanValueCountsFieldId = 137;
231 inline static const SchemaField kNanValueCounts = SchemaField::MakeOptional(
232 kNanValueCountsFieldId,
"nan_value_counts",
233 map(SchemaField::MakeRequired(138, std::string(MapType::kKeyName),
int32()),
234 SchemaField::MakeRequired(139, std::string(MapType::kValueName),
int64())),
235 "Map of column id to number of NaN values in the column");
237 static constexpr int32_t kLowerBoundsFieldId = 125;
238 inline static const SchemaField kLowerBounds = SchemaField::MakeOptional(
239 kLowerBoundsFieldId,
"lower_bounds",
240 map(SchemaField::MakeRequired(126, std::string(MapType::kKeyName),
int32()),
241 SchemaField::MakeRequired(127, std::string(MapType::kValueName),
binary())),
242 "Map of column id to lower bound");
244 static constexpr int32_t kUpperBoundsFieldId = 128;
245 inline static const SchemaField kUpperBounds = SchemaField::MakeOptional(
246 kUpperBoundsFieldId,
"upper_bounds",
247 map(SchemaField::MakeRequired(129, std::string(MapType::kKeyName),
int32()),
248 SchemaField::MakeRequired(130, std::string(MapType::kValueName),
binary())),
249 "Map of column id to upper bound");
251 static constexpr int32_t kKeyMetadataFieldId = 131;
252 inline static const SchemaField kKeyMetadata = SchemaField::MakeOptional(
253 kKeyMetadataFieldId,
"key_metadata",
binary(),
"Encryption key metadata blob");
255 static constexpr int32_t kSplitOffsetsFieldId = 132;
256 inline static const SchemaField kSplitOffsets = SchemaField::MakeOptional(
257 kSplitOffsetsFieldId,
"split_offsets",
258 list(SchemaField::MakeRequired(133, std::string(ListType::kElementName),
int64())),
259 "Splittable offsets");
261 static constexpr int32_t kEqualityIdsFieldId = 135;
262 inline static const SchemaField kEqualityIds = SchemaField::MakeOptional(
263 kEqualityIdsFieldId,
"equality_ids",
264 list(SchemaField::MakeRequired(136, std::string(ListType::kElementName),
int32())),
265 "Equality comparison field IDs");
267 static constexpr int32_t kSortOrderIdFieldId = 140;
268 inline static const SchemaField kSortOrderId = SchemaField::MakeOptional(
269 kSortOrderIdFieldId,
"sort_order_id",
int32(),
"Sort order ID");
271 static constexpr int32_t kFirstRowIdFieldId = 142;
273 SchemaField::MakeOptional(kFirstRowIdFieldId,
"first_row_id",
int64(),
274 "Starting row ID to assign to new rows");
276 static constexpr int32_t kReferencedDataFileFieldId = 143;
277 inline static const SchemaField kReferencedDataFile = SchemaField::MakeOptional(
278 kReferencedDataFileFieldId,
"referenced_data_file",
string(),
279 "Fully qualified location (URI with FS scheme) of a data file that all deletes "
282 static constexpr int32_t kContentOffsetFieldId = 144;
284 SchemaField::MakeOptional(kContentOffsetFieldId,
"content_offset",
int64(),
285 "The offset in the file where the content starts");
287 static constexpr int32_t kContentSizeFieldId = 145;
289 SchemaField::MakeOptional(kContentSizeFieldId,
"content_size_in_bytes",
int64(),
290 "The length of referenced content stored in the file");
292 bool operator==(
const DataFile& other)
const =
default;
295 static std::shared_ptr<StructType>
Type(std::shared_ptr<StructType> partition_type);
299 return content == Content::kPositionDeletes && file_format == FileFormatType::kPuffin;
311 ManifestStatus status = ManifestStatus::kAdded;
346 static constexpr int32_t kStatusFieldId = 0;
348 SchemaField::MakeRequired(kStatusFieldId,
"status",
int32());
350 static constexpr int32_t kSnapshotIdFieldId = 1;
352 SchemaField::MakeOptional(kSnapshotIdFieldId,
"snapshot_id",
int64());
354 static constexpr int32_t kDataFileFieldId = 2;
355 inline static const std::string kDataFileField =
"data_file";
357 static constexpr int32_t kSequenceNumberFieldId = 3;
359 SchemaField::MakeOptional(kSequenceNumberFieldId,
"sequence_number",
int64());
361 static constexpr int32_t kFileSequenceNumberFieldId = 4;
362 inline static const SchemaField kFileSequenceNumber = SchemaField::MakeOptional(
363 kFileSequenceNumberFieldId,
"file_sequence_number",
int64());
367 return status == ManifestStatus::kAdded || status == ManifestStatus::kExisting;
372 copy.
status = ManifestStatus::kAdded;
373 if (copy.data_file->first_row_id.has_value()) {
374 copy.data_file = std::make_unique<DataFile>(*copy.data_file);
375 copy.data_file->first_row_id = std::nullopt;
380 ManifestEntry AsExisting()
const {
381 ManifestEntry copy = *
this;
382 copy.
status = ManifestStatus::kExisting;
386 ManifestEntry AsDeleted()
const {
387 ManifestEntry copy = *
this;
388 copy.
status = ManifestStatus::kDeleted;
392 bool operator==(
const ManifestEntry& other)
const;
394 static std::shared_ptr<StructType> TypeFromPartitionType(
395 std::shared_ptr<StructType> partition_type);
396 static std::shared_ptr<StructType> TypeFromDataFileType(
397 std::shared_ptr<StructType> datafile_type);
ICEBERG_EXPORT constexpr Result< ManifestStatus > ManifestStatusFromInt(int32_t status) noexcept
Get the relative manifest status type from int.
Definition manifest_entry.h:47
ICEBERG_EXPORT constexpr Result< DataFile::Content > DataFileContentFromInt(int32_t content) noexcept
Get the relative data file content type from int.
Definition manifest_entry.h:414
A manifest is an immutable Avro file that lists data files or delete files, along with each file's pa...
Definition manifest_entry.h:307
std::optional< int64_t > snapshot_id
Definition manifest_entry.h:315
std::optional< int64_t > file_sequence_number
Definition manifest_entry.h:341
std::optional< int64_t > sequence_number
Definition manifest_entry.h:328
constexpr bool IsAlive() const
Check if this manifest entry is deleted.
Definition manifest_entry.h:366
std::shared_ptr< DataFile > data_file
Definition manifest_entry.h:344
ManifestStatus status
Definition manifest_entry.h:311