iceberg-cpp
Loading...
Searching...
No Matches
file_metadata.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <cstdint>
26#include <optional>
27#include <string>
28#include <string_view>
29#include <unordered_map>
30#include <vector>
31
32#include "iceberg/iceberg_data_export.h"
33#include "iceberg/result.h"
34
35namespace iceberg::puffin {
36
39 kNone,
40 kLz4,
41 kZstd,
42};
43
44ICEBERG_DATA_EXPORT std::string_view CodecName(PuffinCompressionCodec codec);
45
46ICEBERG_DATA_EXPORT Result<PuffinCompressionCodec> PuffinCompressionCodecFromName(
47 std::string_view codec_name);
48
49ICEBERG_DATA_EXPORT std::string ToString(PuffinCompressionCodec codec);
50
55 static constexpr std::string_view kApacheDatasketchesThetaV1 =
56 "apache-datasketches-theta-v1";
57
59 static constexpr std::string_view kDeletionVectorV1 = "deletion-vector-v1";
60};
61
66 static constexpr std::string_view kCreatedBy = "created-by";
67};
68
70struct ICEBERG_DATA_EXPORT Blob {
72 std::string type;
74 std::vector<int32_t> input_fields;
76 int64_t snapshot_id;
79 std::vector<uint8_t> data;
81 std::optional<PuffinCompressionCodec> requested_compression;
82 std::unordered_map<std::string, std::string> properties;
83
84 friend bool operator==(const Blob& lhs, const Blob& rhs) = default;
85};
86
87ICEBERG_DATA_EXPORT std::string ToString(const Blob& blob);
88
90struct ICEBERG_DATA_EXPORT BlobMetadata {
92 std::string type;
94 std::vector<int32_t> input_fields;
96 int64_t snapshot_id;
99 int64_t offset;
100 int64_t length;
102 std::string compression_codec;
103 std::unordered_map<std::string, std::string> properties;
104
105 friend bool operator==(const BlobMetadata& lhs, const BlobMetadata& rhs) = default;
106};
107
108ICEBERG_DATA_EXPORT std::string ToString(const BlobMetadata& blob_metadata);
109
111struct ICEBERG_DATA_EXPORT FileMetadata {
112 std::vector<BlobMetadata> blobs;
113 std::unordered_map<std::string, std::string> properties;
114
115 friend bool operator==(const FileMetadata& lhs, const FileMetadata& rhs) = default;
116};
117
118ICEBERG_DATA_EXPORT std::string ToString(const FileMetadata& file_metadata);
119
120} // namespace iceberg::puffin
PuffinCompressionCodec
Compression codecs supported by Puffin files.
Definition file_metadata.h:38
Metadata about a blob stored in a Puffin file footer.
Definition file_metadata.h:90
int64_t sequence_number
Sequence number of the Iceberg table's snapshot the blob was computed from.
Definition file_metadata.h:98
std::string type
See StandardBlobTypes for known types.
Definition file_metadata.h:92
std::string compression_codec
Codec name (e.g. "lz4", "zstd"), or empty if not compressed.
Definition file_metadata.h:102
int64_t snapshot_id
ID of the Iceberg table's snapshot the blob was computed from.
Definition file_metadata.h:96
std::vector< int32_t > input_fields
Ordered list of field IDs the blob was computed from.
Definition file_metadata.h:94
A blob in a Puffin file.
Definition file_metadata.h:70
int64_t snapshot_id
ID of the Iceberg table's snapshot the blob was computed from.
Definition file_metadata.h:76
std::vector< int32_t > input_fields
Ordered list of field IDs the blob was computed from.
Definition file_metadata.h:74
std::string type
See StandardBlobTypes for known types.
Definition file_metadata.h:72
std::optional< PuffinCompressionCodec > requested_compression
If not set, the writer's default codec will be used.
Definition file_metadata.h:81
int64_t sequence_number
Sequence number of the Iceberg table's snapshot the blob was computed from.
Definition file_metadata.h:78
Metadata about a Puffin file.
Definition file_metadata.h:111
Standard blob types defined by the Iceberg specification.
Definition file_metadata.h:52
static constexpr std::string_view kApacheDatasketchesThetaV1
Definition file_metadata.h:55
static constexpr std::string_view kDeletionVectorV1
A serialized deletion vector according to the Iceberg spec.
Definition file_metadata.h:59
Standard file-level properties for Puffin files.
Definition file_metadata.h:63
static constexpr std::string_view kCreatedBy
Definition file_metadata.h:66