iceberg-cpp
Loading...
Searching...
No Matches
manifest_reader_internal.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <memory>
26#include <optional>
27#include <string>
28#include <vector>
29
33#include "iceberg/file_reader.h"
36#include "iceberg/util/partition_value_util.h"
37
38namespace iceberg {
39
46 public:
57 ManifestReaderImpl(std::string manifest_path, std::optional<int64_t> manifest_length,
58 std::shared_ptr<FileIO> file_io, std::shared_ptr<Schema> schema,
59 std::shared_ptr<PartitionSpec> spec,
60 std::unique_ptr<InheritableMetadata> inheritable_metadata,
61 std::optional<int64_t> first_row_id);
62
63 Result<std::vector<ManifestEntry>> Entries() override;
64
65 Result<std::vector<ManifestEntry>> LiveEntries() override;
66
67 ManifestReader& Select(const std::vector<std::string>& columns) override;
68
69 ManifestReader& FilterPartitions(std::shared_ptr<Expression> expr) override;
70
71 ManifestReader& FilterPartitions(std::shared_ptr<PartitionSet> partition_set) override;
72
73 ManifestReader& FilterRows(std::shared_ptr<Expression> expr) override;
74
75 ManifestReader& CaseSensitive(bool case_sensitive) override;
76
77 ManifestReader& TryDropStats() override;
78
79 private:
81 Result<std::vector<ManifestEntry>> ReadEntries(bool only_live);
82
84 Status OpenReader(std::shared_ptr<Schema> projection);
85
87 bool HasPartitionFilter() const;
88
90 bool HasRowFilter() const;
91
93 Result<Evaluator*> GetEvaluator();
94
96 Result<InclusiveMetricsEvaluator*> GetMetricsEvaluator();
97
99 Result<bool> InPartitionSet(const DataFile& file) const;
100
101 // Fields set at construction
102 const std::string manifest_path_;
103 const std::optional<int64_t> manifest_length_;
104 const std::shared_ptr<FileIO> file_io_;
105 const std::shared_ptr<Schema> schema_;
106 const std::shared_ptr<PartitionSpec> spec_;
107 const std::unique_ptr<InheritableMetadata> inheritable_metadata_;
108 std::optional<int64_t> first_row_id_;
109
110 // Configuration fields
111 std::vector<std::string> columns_;
112 std::shared_ptr<Expression> part_filter_{True::Instance()};
113 std::shared_ptr<Expression> row_filter_{True::Instance()};
114 std::shared_ptr<PartitionSet> partition_set_;
115 bool case_sensitive_{true};
116 bool drop_stats_{false};
117
118 // Lazy fields
119 std::unique_ptr<Reader> file_reader_;
120 std::shared_ptr<Schema> file_schema_;
121 std::unique_ptr<Evaluator> evaluator_;
122 std::unique_ptr<InclusiveMetricsEvaluator> metrics_evaluator_;
123};
124
127 public:
128 explicit ManifestListReaderImpl(std::unique_ptr<Reader> reader,
129 std::shared_ptr<Schema> schema)
130 : schema_(std::move(schema)), reader_(std::move(reader)) {}
131
132 Result<std::vector<ManifestFile>> Files() const override;
133
134 Result<std::unordered_map<std::string, std::string>> Metadata() const override;
135
136 private:
137 std::shared_ptr<Schema> schema_;
138 std::unique_ptr<Reader> reader_;
139};
140
141enum class ManifestFileField : int32_t {
142 kManifestPath = 0,
143 kManifestLength = 1,
144 kPartitionSpecId = 2,
145 kContent = 3,
146 kSequenceNumber = 4,
147 kMinSequenceNumber = 5,
148 kAddedSnapshotId = 6,
149 kAddedFilesCount = 7,
150 kExistingFilesCount = 8,
151 kDeletedFilesCount = 9,
152 kAddedRowsCount = 10,
153 kExistingRowsCount = 11,
154 kDeletedRowsCount = 12,
155 kPartitionFieldSummary = 13,
156 kKeyMetadata = 14,
157 kFirstRowId = 15,
158 // kNextUnusedId is the placeholder for the next unused index.
159 // Always keep this as the last index when adding new fields.
160 kNextUnusedId = 16,
161};
162
163Result<ManifestFileField> ManifestFileFieldFromIndex(int32_t index);
164
165} // namespace iceberg
Read manifest files from a manifest list file.
Definition manifest_reader_internal.h:126
Result< std::vector< ManifestFile > > Files() const override
Read all manifest files in the manifest list file.
Definition manifest_reader.cc:952
Result< std::unordered_map< std::string, std::string > > Metadata() const override
Get the metadata of the manifest list file.
Definition manifest_reader.cc:972
Read manifest files from a manifest list file.
Definition manifest_reader.h:117
Read manifest entries from a manifest file.
Definition manifest_reader_internal.h:45
ManifestReader & Select(const std::vector< std::string > &columns) override
Select specific columns of data file to read from the manifest entries.
Definition manifest_reader.cc:738
ManifestReader & FilterPartitions(std::shared_ptr< Expression > expr) override
Filter manifest entries by partition filter.
Definition manifest_reader.cc:743
ManifestReader & CaseSensitive(bool case_sensitive) override
Set case sensitivity for column name matching.
Definition manifest_reader.cc:759
Result< std::vector< ManifestEntry > > LiveEntries() override
Read only live (non-deleted) manifest entries.
Definition manifest_reader.cc:855
ManifestReader & FilterRows(std::shared_ptr< Expression > expr) override
Filter manifest entries by row-level filter.
Definition manifest_reader.cc:754
Result< std::vector< ManifestEntry > > Entries() override
Read all manifest entries in the manifest file.
Definition manifest_reader.cc:851
ManifestReader & TryDropStats() override
Try to drop stats from returned DataFile objects.
Definition manifest_reader.cc:764
Read manifest entries from a manifest file.
Definition manifest_reader.h:39
static const std::shared_ptr< True > & Instance()
Returns the singleton instance.
Definition expression.cc:32
DataFile carries data file path, partition tuple, metrics, ...
Definition manifest_entry.h:62