iceberg-cpp
Loading...
Searching...
No Matches
manifest_adapter_internal.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <memory>
26#include <optional>
27#include <unordered_map>
28#include <vector>
29
31#include "iceberg/result.h"
32#include "iceberg/type_fwd.h"
33
34namespace iceberg {
35
37class ICEBERG_EXPORT ManifestAdapter {
38 public:
39 ManifestAdapter() = default;
40 virtual ~ManifestAdapter() = default;
41 virtual Status Init() = 0;
42
43 Status StartAppending();
44 Result<ArrowArray*> FinishAppending();
45 int64_t size() const { return size_; }
46 const std::unordered_map<std::string, std::string>& metadata() const {
47 return metadata_;
48 }
49
50 protected:
51 ArrowArray array_;
52 // Arrow schema of manifest or manifest list depending on the subclass
53 ArrowSchema schema_;
54 // Number of appended elements in the array
55 int64_t size_ = 0;
56 std::unordered_map<std::string, std::string> metadata_;
57};
58
61class ICEBERG_EXPORT ManifestEntryAdapter : public ManifestAdapter {
62 public:
63 ManifestEntryAdapter(std::optional<int64_t> snapshot_id_,
64 std::shared_ptr<PartitionSpec> partition_spec,
65 std::shared_ptr<Schema> current_schema, ManifestContent content);
66
67 ~ManifestEntryAdapter() override;
68
69 virtual Status Append(const ManifestEntry& entry) = 0;
70
71 const std::shared_ptr<Schema>& schema() const { return manifest_schema_; }
72
73 ManifestContent content() const { return content_; }
74
75 std::optional<int64_t> snapshot_id() const { return snapshot_id_; }
76
77 const std::shared_ptr<PartitionSpec>& partition_spec() const { return partition_spec_; }
78
79 const std::shared_ptr<StructType>& partition_type() const { return partition_type_; }
80
81 protected:
82 Status AppendInternal(const ManifestEntry& entry);
83 Status AppendDataFile(ArrowArray* array,
84 const std::shared_ptr<StructType>& data_file_type,
85 const DataFile& file);
86 static Status AppendPartitionValues(ArrowArray* array,
87 const std::shared_ptr<StructType>& partition_type,
88 const PartitionValues& partition_values);
89
90 virtual Result<std::optional<int64_t>> GetSequenceNumber(
91 const ManifestEntry& entry) const;
92 virtual Result<std::optional<std::string>> GetReferenceDataFile(
93 const DataFile& file) const;
94 virtual Result<std::optional<int64_t>> GetFirstRowId(const DataFile& file) const;
95 virtual Result<std::optional<int64_t>> GetContentOffset(const DataFile& file) const;
96 virtual Result<std::optional<int64_t>> GetContentSizeInBytes(
97 const DataFile& file) const;
98
99 protected:
100 std::optional<int64_t> snapshot_id_;
101 std::shared_ptr<PartitionSpec> partition_spec_;
102 std::shared_ptr<Schema> current_schema_;
103 std::shared_ptr<StructType> partition_type_;
104 std::shared_ptr<Schema> manifest_schema_;
105 const ManifestContent content_;
106};
107
110class ICEBERG_EXPORT ManifestFileAdapter : public ManifestAdapter {
111 public:
112 ManifestFileAdapter() = default;
113 ~ManifestFileAdapter() override;
114
115 virtual Status Append(const ManifestFile& file) = 0;
116
117 const std::shared_ptr<Schema>& schema() const { return manifest_list_schema_; }
118
119 virtual std::optional<int64_t> next_row_id() const { return std::nullopt; }
120
121 protected:
122 Status AppendInternal(const ManifestFile& file);
123 static Status AppendPartitionSummary(
124 ArrowArray* array, const std::shared_ptr<ListType>& summary_type,
125 const std::vector<PartitionFieldSummary>& summaries);
126
127 virtual Result<int64_t> GetSequenceNumber(const ManifestFile& file) const;
128 virtual Result<int64_t> GetMinSequenceNumber(const ManifestFile& file) const;
129 virtual Result<std::optional<int64_t>> GetFirstRowId(const ManifestFile& file) const;
130
131 protected:
132 std::shared_ptr<Schema> manifest_list_schema_;
133};
134
135} // namespace iceberg
Base class for appending manifest metadata to Arrow arrays.
Definition manifest_adapter_internal.h:37
Adapter for appending a list of ManifestEntrys to an ArrowArray. Implemented by different versions wi...
Definition manifest_adapter_internal.h:61
Adapter for appending a list of ManifestFiles to an ArrowArray. Implemented by different versions wit...
Definition manifest_adapter_internal.h:110
StructLike wrapper for a vector of literals that represent partition values.
Definition partition_values.h:36
ManifestContent
The type of files tracked by the manifest, either data or delete files; 0 for all v1 manifests.
Definition manifest_list.h:77
Definition arrow_c_data.h:57
Definition arrow_c_data.h:41
DataFile carries data file path, partition tuple, metrics, ...
Definition manifest_entry.h:62
A manifest is an immutable Avro file that lists data files or delete files, along with each file's pa...
Definition manifest_entry.h:307
Entry in a manifest list.
Definition manifest_list.h:85