iceberg-cpp
Loading...
Searching...
No Matches
manifest_group.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <functional>
26#include <memory>
27#include <string>
28#include <unordered_map>
29#include <unordered_set>
30#include <vector>
31
33#include "iceberg/iceberg_export.h"
36#include "iceberg/result.h"
37#include "iceberg/type_fwd.h"
39
40namespace iceberg {
41
43struct ICEBERG_EXPORT TaskContext {
44 public:
45 std::shared_ptr<PartitionSpec> spec;
46 DeleteFileIndex* deletes;
47 ResidualEvaluator* residuals;
48 bool drop_stats;
49 std::unordered_set<int32_t> columns_to_keep_stats;
50};
51
53class ICEBERG_EXPORT ManifestGroup : public ErrorCollector {
54 public:
61 static Result<std::unique_ptr<ManifestGroup>> Make(
62 std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
63 std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_,
64 std::vector<ManifestFile> manifests);
65
73 static Result<std::unique_ptr<ManifestGroup>> Make(
74 std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
75 std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
76 std::vector<ManifestFile> data_manifests,
77 std::vector<ManifestFile> delete_manifests);
78
79 ~ManifestGroup() override;
80
81 ManifestGroup(ManifestGroup&&) noexcept;
82 ManifestGroup& operator=(ManifestGroup&&) noexcept;
83 ManifestGroup(const ManifestGroup&) = delete;
84 ManifestGroup& operator=(const ManifestGroup&) = delete;
85
87 ManifestGroup& FilterData(std::shared_ptr<Expression> filter);
88
90 ManifestGroup& FilterFiles(std::shared_ptr<Expression> filter);
91
93 ManifestGroup& FilterPartitions(std::shared_ptr<Expression> filter);
94
98 ManifestGroup& FilterManifestEntries(
99 std::function<bool(const ManifestEntry&)> predicate);
100
102 ManifestGroup& IgnoreDeleted();
103
105 ManifestGroup& IgnoreExisting();
106
108 ManifestGroup& IgnoreResiduals();
109
113 ManifestGroup& Select(std::vector<std::string> columns);
114
116 ManifestGroup& CaseSensitive(bool case_sensitive);
117
121 ManifestGroup& ColumnsToKeepStats(std::unordered_set<int32_t> column_ids);
122
124 Result<std::vector<std::shared_ptr<FileScanTask>>> PlanFiles();
125
127 Result<std::vector<ManifestEntry>> Entries();
128
129 using CreateTasksFunction =
130 std::function<Result<std::vector<std::shared_ptr<ScanTask>>>(
131 std::vector<ManifestEntry>&&, const TaskContext&)>;
132
137 Result<std::vector<std::shared_ptr<ScanTask>>> Plan(
138 const CreateTasksFunction& create_tasks);
139
140 private:
141 ManifestGroup(std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
142 std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
143 std::vector<ManifestFile> data_manifests,
144 DeleteFileIndex::Builder&& delete_index_builder);
145
146 Result<std::unordered_map<int32_t, std::vector<ManifestEntry>>> ReadEntries();
147
148 Result<std::unique_ptr<ManifestReader>> MakeReader(const ManifestFile& manifest);
149
150 std::shared_ptr<FileIO> io_;
151 std::shared_ptr<Schema> schema_;
152 std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_;
153 std::vector<ManifestFile> data_manifests_;
154 DeleteFileIndex::Builder delete_index_builder_;
155 std::shared_ptr<Expression> data_filter_;
156 std::shared_ptr<Expression> file_filter_;
157 std::shared_ptr<Expression> partition_filter_;
158 std::function<bool(const ManifestEntry&)> manifest_entry_predicate_;
159 std::vector<std::string> columns_;
160 std::unordered_set<int32_t> columns_to_keep_stats_;
161 bool case_sensitive_ = true;
162 bool ignore_deleted_ = false;
163 bool ignore_existing_ = false;
164 bool ignore_residuals_ = false;
165};
166
167} // namespace iceberg
An index of delete files by sequence number.
Definition delete_file_index.h:228
Base class for collecting errors in the builder pattern.
Definition error_collector.h:93
Coordinates reading manifest files and producing scan tasks.
Definition manifest_group.h:53
Finds the residuals for an Expression using the partitions in the given PartitionSpec.
Definition residual_evaluator.h:47
A manifest is an immutable Avro file that lists data files or delete files, along with each file's pa...
Definition manifest_entry.h:307
Entry in a manifest list.
Definition manifest_list.h:85
Context passed to task creation functions.
Definition manifest_group.h:43