iceberg-cpp
Loading...
Searching...
No Matches
manifest_reader.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <cstdint>
26#include <memory>
27#include <optional>
28#include <string>
29#include <unordered_map>
30#include <vector>
31
32#include "iceberg/iceberg_export.h"
33#include "iceberg/result.h"
34#include "iceberg/type_fwd.h"
35
36namespace iceberg {
37
39class ICEBERG_EXPORT ManifestReader {
40 public:
41 virtual ~ManifestReader() = default;
42
46 virtual Result<std::vector<ManifestEntry>> Entries() = 0;
47
49 virtual Result<std::vector<ManifestEntry>> LiveEntries() = 0;
50
55 virtual ManifestReader& Select(const std::vector<std::string>& columns) = 0;
56
61 virtual ManifestReader& FilterPartitions(std::shared_ptr<Expression> expr) = 0;
62
65 std::shared_ptr<class PartitionSet> partition_set) = 0;
66
71 virtual ManifestReader& FilterRows(std::shared_ptr<Expression> expr) = 0;
72
74 virtual ManifestReader& CaseSensitive(bool case_sensitive) = 0;
75
78
83 static bool ShouldDropStats(const std::vector<std::string>& columns);
84
91 static Result<std::unique_ptr<ManifestReader>> Make(
92 const ManifestFile& manifest, std::shared_ptr<FileIO> file_io,
93 std::shared_ptr<Schema> schema, std::shared_ptr<PartitionSpec> spec);
94
104 static Result<std::unique_ptr<ManifestReader>> Make(
105 std::string_view manifest_location, std::optional<int64_t> manifest_length,
106 std::shared_ptr<FileIO> file_io, std::shared_ptr<Schema> schema,
107 std::shared_ptr<PartitionSpec> spec,
108 std::unique_ptr<InheritableMetadata> inheritable_metadata,
109 std::optional<int64_t> first_row_id = std::nullopt);
110
112 static std::vector<std::string> WithStatsColumns(
113 const std::vector<std::string>& columns);
114};
115
117class ICEBERG_EXPORT ManifestListReader {
118 public:
119 virtual ~ManifestListReader() = default;
120
122 virtual Result<std::vector<ManifestFile>> Files() const = 0;
123
125 virtual Result<std::unordered_map<std::string, std::string>> Metadata() const = 0;
126
131 static Result<std::unique_ptr<ManifestListReader>> Make(
132 std::string_view manifest_list_location, std::shared_ptr<FileIO> file_io);
133};
134
135} // namespace iceberg
Read manifest files from a manifest list file.
Definition manifest_reader.h:117
virtual Result< std::unordered_map< std::string, std::string > > Metadata() const =0
Get the metadata of the manifest list file.
virtual Result< std::vector< ManifestFile > > Files() const =0
Read all manifest files in the manifest list file.
Read manifest entries from a manifest file.
Definition manifest_reader.h:39
virtual Result< std::vector< ManifestEntry > > Entries()=0
Read all manifest entries in the manifest file.
virtual ManifestReader & FilterRows(std::shared_ptr< Expression > expr)=0
Filter manifest entries by row-level filter.
virtual ManifestReader & Select(const std::vector< std::string > &columns)=0
Select specific columns of data file to read from the manifest entries.
virtual ManifestReader & FilterPartitions(std::shared_ptr< Expression > expr)=0
Filter manifest entries by partition filter.
virtual ManifestReader & FilterPartitions(std::shared_ptr< class PartitionSet > partition_set)=0
Filter manifest entries to a specific set of partitions.
virtual ManifestReader & CaseSensitive(bool case_sensitive)=0
Set case sensitivity for column name matching.
virtual ManifestReader & TryDropStats()=0
Try to drop stats from returned DataFile objects.
virtual Result< std::vector< ManifestEntry > > LiveEntries()=0
Read only live (non-deleted) manifest entries.
Entry in a manifest list.
Definition manifest_list.h:85