iceberg-cpp
Loading...
Searching...
No Matches
type_util.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
22#include <functional>
23#include <memory>
24#include <string>
25#include <string_view>
26#include <unordered_map>
27#include <unordered_set>
28#include <vector>
29
30#include "iceberg/iceberg_export.h"
31#include "iceberg/result.h"
33#include "iceberg/type_fwd.h"
34#include "iceberg/util/string_util.h"
35
38
39namespace iceberg {
40
43 public:
44 explicit IdToFieldVisitor(
45 std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
46 id_to_field);
47 Status Visit(const PrimitiveType& type);
48 Status Visit(const NestedType& type);
49
50 private:
51 std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& id_to_field_;
52};
53
57 public:
58 explicit NameToIdVisitor(
59 std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>& name_to_id,
60 std::unordered_map<int32_t, std::string>* id_to_name, bool case_sensitive = true,
61 std::function<std::string(std::string_view)> quoting_func = {});
62 Status Visit(const ListType& type, const std::string& path,
63 const std::string& short_path);
64 Status Visit(const MapType& type, const std::string& path,
65 const std::string& short_path);
66 Status Visit(const StructType& type, const std::string& path,
67 const std::string& short_path);
68 Status Visit(const PrimitiveType& type, const std::string& path,
69 const std::string& short_path);
70 void Finish();
71
72 private:
73 std::string BuildPath(std::string_view prefix, std::string_view field_name,
74 bool case_sensitive);
75
76 private:
77 bool case_sensitive_;
78 std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>& name_to_id_;
79 std::unordered_map<int32_t, std::string>* id_to_name_;
80 std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>> short_name_to_id_;
81 std::function<std::string(std::string_view)> quoting_func_;
82};
83
86 public:
87 Status Visit(const PrimitiveType& type);
88 Status Visit(const StructType& type);
89 Status Visit(const ListType& type);
90 Status Visit(const MapType& type);
91 std::unordered_map<int32_t, std::vector<size_t>> Finish();
92
93 private:
94 constexpr static int32_t kUnassignedFieldId = -1;
95 int32_t current_field_id_ = kUnassignedFieldId;
96 std::vector<size_t> current_path_;
97 std::unordered_map<int32_t, std::vector<size_t>> position_path_;
98};
99
109 public:
110 PruneColumnVisitor(const std::unordered_set<int32_t>& selected_ids,
111 bool select_full_types);
112
113 Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<Type>& type) const;
114 Result<std::shared_ptr<Type>> Visit(const SchemaField& field) const;
115 static SchemaField MakeField(const SchemaField& field, std::shared_ptr<Type> type);
116 Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<StructType>& type) const;
117 Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<ListType>& type) const;
118 Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<MapType>& type) const;
119
120 private:
121 const std::unordered_set<int32_t>& selected_ids_;
122 const bool select_full_types_;
123};
124
127 public:
128 explicit GetProjectedIdsVisitor(bool include_struct_ids = false);
129
130 Status Visit(const Type& type);
131 Status VisitNested(const NestedType& type);
132 Status VisitPrimitive(const PrimitiveType& type);
133 std::unordered_set<int32_t> Finish() const;
134
135 static Result<std::unordered_set<int32_t>> GetProjectedIds(
136 const Type& type, bool include_struct_ids = false);
137
138 private:
139 const bool include_struct_ids_;
140 std::unordered_set<int32_t> ids_;
141};
142
151ICEBERG_EXPORT std::unordered_map<int32_t, int32_t> IndexParents(
152 const StructType& root_struct);
153
156 public:
157 explicit AssignFreshIdVisitor(std::function<int32_t()> next_id);
158
159 std::shared_ptr<Type> Visit(const std::shared_ptr<Type>& type) const;
160 std::shared_ptr<StructType> Visit(const StructType& type) const;
161 std::shared_ptr<ListType> Visit(const ListType& type) const;
162 std::shared_ptr<MapType> Visit(const MapType& type) const;
163
164 private:
165 std::function<int32_t()> next_id_;
166};
167
174ICEBERG_EXPORT Result<std::shared_ptr<Schema>> AssignFreshIds(
175 int32_t schema_id, const Schema& schema, std::function<int32_t()> next_id);
176
187ICEBERG_EXPORT bool IsPromotionAllowed(const std::shared_ptr<Type>& from_type,
188 const std::shared_ptr<Type>& to_type);
189
190} // namespace iceberg
Assigns fresh IDs to all fields in the schema.
Definition type_util.h:155
Visitor for getting projected field IDs.
Definition type_util.h:126
Visitor for building a map from field ID to SchemaField reference.
Definition type_util.h:42
A data type representing a list of values.
Definition type.h:145
A data type representing a dictionary of values.
Definition type.h:177
Visitor for building maps from field name to field ID and field ID to field name.
Definition type_util.h:56
A data type that has child fields.
Definition type.h:73
Visitor for building a map from field ID to position path.
Definition type_util.h:85
A data type that does not have child fields.
Definition type.h:66
Visitor for pruning columns based on selected field IDs.
Definition type_util.h:108
A type combined with a name.
Definition schema_field.h:39
A schema for a Table.
Definition schema.h:49
A data type representing a struct with nested fields.
Definition type.h:108
Interface for a data type for a field.
Definition type.h:44
Result< R > Visit(const std::shared_ptr< Expression > &expr, V &visitor)
Traverse an expression tree with a visitor.
Definition expression_visitor.h:283
Transparent hash function that supports std::string_view as lookup key.
Definition string_util.h:137