iceberg-cpp
Loading...
Searching...
No Matches
avro_schema_util_internal.h
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
22#include <stack>
23
24#include <avro/Node.hh>
25
26#include "iceberg/name_mapping.h"
27#include "iceberg/result.h"
28#include "iceberg/schema_util.h"
29#include "iceberg/type.h"
30
31namespace avro {
32class Schema;
33class ValidSchema;
34} // namespace avro
35
36namespace iceberg::avro {
37
38struct MapLogicalType : public ::avro::CustomLogicalType {
39 MapLogicalType() : ::avro::CustomLogicalType("map") {}
40};
41
44 public:
45 Status Visit(const BooleanType& type, ::avro::NodePtr* node);
46 Status Visit(const IntType& type, ::avro::NodePtr* node);
47 Status Visit(const LongType& type, ::avro::NodePtr* node);
48 Status Visit(const FloatType& type, ::avro::NodePtr* node);
49 Status Visit(const DoubleType& type, ::avro::NodePtr* node);
50 Status Visit(const DecimalType& type, ::avro::NodePtr* node);
51 Status Visit(const DateType& type, ::avro::NodePtr* node);
52 Status Visit(const TimeType& type, ::avro::NodePtr* node);
53 Status Visit(const TimestampType& type, ::avro::NodePtr* node);
54 Status Visit(const TimestampTzType& type, ::avro::NodePtr* node);
55 Status Visit(const StringType& type, ::avro::NodePtr* node);
56 Status Visit(const UuidType& type, ::avro::NodePtr* node);
57 Status Visit(const FixedType& type, ::avro::NodePtr* node);
58 Status Visit(const BinaryType& type, ::avro::NodePtr* node);
59 Status Visit(const StructType& type, ::avro::NodePtr* node);
60 Status Visit(const ListType& type, ::avro::NodePtr* node);
61 Status Visit(const MapType& type, ::avro::NodePtr* node);
62 Status Visit(const SchemaField& field, ::avro::NodePtr* node);
63
64 private:
65 // Store recently accessed field ids on the current visitor path.
66 std::stack<int32_t> field_ids_;
67};
68
71 public:
72 HasIdVisitor() = default;
73
78 Status Visit(const ::avro::NodePtr& node);
79
84 Status Visit(const ::avro::ValidSchema& schema);
85
90 Status Visit(const ::avro::Schema& node);
91
94 bool AllHaveIds() const {
95 return total_fields_ == fields_with_id_ && fields_with_id_ != 0;
96 }
97
100 bool HasNoIds() const { return total_fields_ == 0; }
101
102 private:
106 Status VisitRecord(const ::avro::NodePtr& node);
107
111 Status VisitArray(const ::avro::NodePtr& node);
112
116 Status VisitMap(const ::avro::NodePtr& node);
117
121 Status VisitUnion(const ::avro::NodePtr& node);
122
123 private:
124 // Total number of fields visited.
125 size_t total_fields_ = 0;
126 // Number of fields with IDs.
127 size_t fields_with_id_ = 0;
128};
129
140Result<SchemaProjection> Project(const Schema& expected_schema,
141 const ::avro::NodePtr& avro_node, bool prune_source);
142
143std::string ToString(const ::avro::NodePtr& node);
144std::string ToString(const ::avro::LogicalType& logical_type);
145std::string ToString(const ::avro::LogicalType::Type& logical_type);
146
150bool HasMapLogicalType(const ::avro::NodePtr& node);
151
160bool ValidAvroName(std::string_view name);
161
166Result<::avro::NodePtr> MakeAvroNodeWithFieldIds(const ::avro::NodePtr& original_node,
167 const NameMapping& mapping);
168
190std::string SanitizeFieldName(std::string_view field_name);
191
192} // namespace iceberg::avro
A data type representing an arbitrary-length byte sequence.
Definition type.h:400
A data type representing a boolean (true or false).
Definition type.h:216
A data type representing a calendar date without reference to a timezone or time.
Definition type.h:321
A data type representing a fixed-precision decimal.
Definition type.h:293
A data type representing a 64-bit (double precision) IEEE-754 float.
Definition type.h:278
A data type representing a fixed-length bytestring.
Definition type.h:431
A data type representing a 32-bit (single precision) IEEE-754 float.
Definition type.h:262
A data type representing a 32-bit signed integer.
Definition type.h:231
A data type representing a list of values.
Definition type.h:145
A data type representing a 64-bit signed integer.
Definition type.h:246
A data type representing a dictionary of values.
Definition type.h:177
Represents a mapping from external schema names to Iceberg type IDs.
Definition name_mapping.h:96
A type combined with a name.
Definition schema_field.h:39
A schema for a Table.
Definition schema.h:49
A data type representing an arbitrary-length character sequence (encoded in UTF-8).
Definition type.h:416
A data type representing a struct with nested fields.
Definition type.h:108
A data type representing a wall clock time in microseconds without reference to a timezone or date.
Definition type.h:337
A data type representing a timestamp in microseconds without reference to a timezone.
Definition type.h:363
A data type representing a timestamp as microseconds since the epoch in UTC. A time zone or offset is...
Definition type.h:382
A data type representing a UUID. While defined as a distinct type, it is effectively a fixed(16).
Definition type.h:454
A visitor that checks the presence of field IDs in an Avro schema.
Definition avro_schema_util_internal.h:70
Status Visit(const ::avro::NodePtr &node)
Visit an Avro node to check for field IDs.
Definition avro_schema_util.cc:346
bool AllHaveIds() const
Check if all fields in the visited schema have field IDs.
Definition avro_schema_util_internal.h:94
bool HasNoIds() const
Check if all fields in the visited schema have field IDs.
Definition avro_schema_util_internal.h:100
A visitor that converts an Iceberg type to an Avro node.
Definition avro_schema_util_internal.h:43
Result< R > Visit(const std::shared_ptr< Expression > &expr, V &visitor)
Traverse an expression tree with a visitor.
Definition expression_visitor.h:283
Definition avro_schema_util_internal.h:38