iceberg-cpp
Loading...
Searching...
No Matches
aggregate.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <memory>
26#include <span>
27#include <string>
28#include <vector>
29
32#include "iceberg/result.h"
33#include "iceberg/type_fwd.h"
34
35namespace iceberg {
36
38template <TermType T>
39class ICEBERG_EXPORT Aggregate : public virtual Expression {
40 public:
41 ~Aggregate() override = default;
42
43 Expression::Operation op() const override { return operation_; }
44
45 const std::shared_ptr<T>& term() const { return term_; }
46
47 std::string ToString() const override;
48
49 protected:
50 Aggregate(Expression::Operation op, std::shared_ptr<T> term)
51 : operation_(op), term_(std::move(term)) {}
52
53 static constexpr bool IsSupportedOp(Expression::Operation op) {
54 return op == Expression::Operation::kCount ||
55 op == Expression::Operation::kCountNull ||
56 op == Expression::Operation::kCountStar || op == Expression::Operation::kMax ||
57 op == Expression::Operation::kMin;
58 }
59
60 Expression::Operation operation_;
61 std::shared_ptr<T> term_;
62};
63
65class ICEBERG_EXPORT UnboundAggregate : public virtual Expression,
66 public Unbound<Expression> {
67 public:
68 ~UnboundAggregate() override = default;
69
70 bool is_unbound_aggregate() const override { return true; }
71};
72
74template <typename B>
75class ICEBERG_EXPORT UnboundAggregateImpl : public UnboundAggregate,
76 public Aggregate<UnboundTerm<B>> {
77 using BASE = Aggregate<UnboundTerm<B>>;
78
79 public:
80 static Result<std::shared_ptr<UnboundAggregateImpl<B>>> Make(
81 Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term);
82
83 std::shared_ptr<NamedReference> reference() override {
84 return BASE::term() ? BASE::term()->reference() : nullptr;
85 }
86
87 Result<std::shared_ptr<Expression>> Bind(const Schema& schema,
88 bool case_sensitive) const override;
89
90 private:
91 UnboundAggregateImpl(Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term)
92 : BASE(op, std::move(term)) {
93 ICEBERG_DCHECK(BASE::IsSupportedOp(op), "Unexpected aggregate operation");
94 ICEBERG_DCHECK(op == Expression::Operation::kCountStar || BASE::term() != nullptr,
95 "Aggregate term cannot be null except for COUNT(*)");
96 }
97};
98
100class ICEBERG_EXPORT BoundAggregate : public Aggregate<BoundTerm>, public Bound {
101 public:
102 using Aggregate<BoundTerm>::op;
103 using Aggregate<BoundTerm>::term;
104
107 public:
108 virtual ~Aggregator() = default;
109
110 virtual Status Update(const StructLike& data) = 0;
111
112 virtual Status Update(const DataFile& file) = 0;
113
115 virtual bool IsValid() const = 0;
116
121 virtual Literal GetResult() const = 0;
122 };
123
124 std::shared_ptr<BoundReference> reference() override {
125 ICEBERG_DCHECK(term() != nullptr || op() == Expression::Operation::kCountStar,
126 "Bound aggregate term should not be null except for COUNT(*)");
127 return term() ? term()->reference() : nullptr;
128 }
129
130 Result<Literal> Evaluate(const StructLike& data) const override = 0;
131
132 virtual Result<Literal> Evaluate(const DataFile& file) const = 0;
133
135 virtual bool HasValue(const DataFile& file) const = 0;
136
137 bool is_bound_aggregate() const override { return true; }
138
141 virtual std::unique_ptr<Aggregator> NewAggregator() const = 0;
142
143 protected:
144 BoundAggregate(Expression::Operation op, std::shared_ptr<BoundTerm> term)
145 : Aggregate<BoundTerm>(op, std::move(term)) {}
146};
147
149class ICEBERG_EXPORT CountAggregate : public BoundAggregate {
150 public:
151 Result<Literal> Evaluate(const StructLike& data) const override;
152 Result<Literal> Evaluate(const DataFile& file) const override;
153
154 std::unique_ptr<Aggregator> NewAggregator() const override;
155
157 virtual Result<int64_t> CountFor(const StructLike& data) const = 0;
159 virtual Result<int64_t> CountFor(const DataFile& file) const = 0;
160
161 protected:
162 CountAggregate(Expression::Operation op, std::shared_ptr<BoundTerm> term)
163 : BoundAggregate(op, std::move(term)) {}
164};
165
167class ICEBERG_EXPORT CountNonNullAggregate : public CountAggregate {
168 public:
169 static Result<std::unique_ptr<CountNonNullAggregate>> Make(
170 std::shared_ptr<BoundTerm> term);
171
172 Result<int64_t> CountFor(const StructLike& data) const override;
173 Result<int64_t> CountFor(const DataFile& file) const override;
174 bool HasValue(const DataFile& file) const override;
175
176 private:
177 explicit CountNonNullAggregate(std::shared_ptr<BoundTerm> term);
178};
179
181class ICEBERG_EXPORT CountNullAggregate : public CountAggregate {
182 public:
183 static Result<std::unique_ptr<CountNullAggregate>> Make(
184 std::shared_ptr<BoundTerm> term);
185
186 Result<int64_t> CountFor(const StructLike& data) const override;
187 Result<int64_t> CountFor(const DataFile& file) const override;
188 bool HasValue(const DataFile& file) const override;
189
190 private:
191 explicit CountNullAggregate(std::shared_ptr<BoundTerm> term);
192};
193
195class ICEBERG_EXPORT CountStarAggregate : public CountAggregate {
196 public:
197 static Result<std::unique_ptr<CountStarAggregate>> Make();
198
199 Result<int64_t> CountFor(const StructLike& data) const override;
200 Result<int64_t> CountFor(const DataFile& file) const override;
201 bool HasValue(const DataFile& file) const override;
202
203 private:
205};
206
208class ICEBERG_EXPORT MaxAggregate : public BoundAggregate {
209 public:
210 static Result<std::unique_ptr<MaxAggregate>> Make(std::shared_ptr<BoundTerm> term);
211
212 Result<Literal> Evaluate(const StructLike& data) const override;
213 Result<Literal> Evaluate(const DataFile& file) const override;
214 bool HasValue(const DataFile& file) const override;
215
216 std::unique_ptr<Aggregator> NewAggregator() const override;
217
218 private:
219 explicit MaxAggregate(std::shared_ptr<BoundTerm> term);
220};
221
223class ICEBERG_EXPORT MinAggregate : public BoundAggregate {
224 public:
225 static Result<std::unique_ptr<MinAggregate>> Make(std::shared_ptr<BoundTerm> term);
226
227 Result<Literal> Evaluate(const StructLike& data) const override;
228 Result<Literal> Evaluate(const DataFile& file) const override;
229 bool HasValue(const DataFile& file) const override;
230
231 std::unique_ptr<Aggregator> NewAggregator() const override;
232
233 private:
234 explicit MinAggregate(std::shared_ptr<BoundTerm> term);
235};
236
238class ICEBERG_EXPORT AggregateEvaluator {
239 public:
240 virtual ~AggregateEvaluator() = default;
241
244 static Result<std::unique_ptr<AggregateEvaluator>> Make(
245 std::shared_ptr<BoundAggregate> aggregate);
246
250 static Result<std::unique_ptr<AggregateEvaluator>> Make(
251 std::vector<std::shared_ptr<BoundAggregate>> aggregates);
252
254 virtual Status Update(const StructLike& data) = 0;
255
257 virtual Status Update(const DataFile& file) = 0;
258
260 virtual Result<std::span<const Literal>> GetResults() const = 0;
261
263 virtual Result<Literal> GetResult() const = 0;
264
266 virtual bool AllAggregatorsValid() const = 0;
267};
268
269} // namespace iceberg
Evaluates bound aggregates over StructLike data.
Definition aggregate.h:238
virtual Status Update(const DataFile &file)=0
Update aggregates using data file metrics.
virtual Result< Literal > GetResult() const =0
Convenience accessor when only one aggregate is evaluated.
virtual Status Update(const StructLike &data)=0
Update aggregates with a row.
virtual Result< std::span< const Literal > > GetResults() const =0
Final aggregated value.
virtual bool AllAggregatorsValid() const =0
Whether all aggregators are still valid (metrics present).
Base aggregate holding an operation and a term.
Definition aggregate.h:39
Expression::Operation op() const override
Returns the operation for an expression node.
Definition aggregate.h:43
Base class for aggregators.
Definition aggregate.h:106
virtual bool IsValid() const =0
Whether the aggregator is still valid.
virtual Literal GetResult() const =0
Get the result of the aggregation.
Base class for bound aggregates.
Definition aggregate.h:100
virtual std::unique_ptr< Aggregator > NewAggregator() const =0
Create a new aggregator for this aggregate.
virtual bool HasValue(const DataFile &file) const =0
Whether metrics in the data file are sufficient to evaluate.
std::shared_ptr< BoundReference > reference() override
Returns the underlying bound reference for this term.
Definition aggregate.h:124
Result< Literal > Evaluate(const StructLike &data) const override=0
Evaluate this expression against a row-based data.
Base class for bound terms.
Definition term.h:64
Interface for bound expressions that can be evaluated.
Definition expression.h:360
Base class for COUNT aggregates.
Definition aggregate.h:149
virtual Result< int64_t > CountFor(const StructLike &data) const =0
Count for a single row. Subclasses implement this.
virtual Result< int64_t > CountFor(const DataFile &file) const =0
Count using metrics from a data file.
COUNT(term) aggregate.
Definition aggregate.h:167
COUNT_NULL(term) aggregate.
Definition aggregate.h:181
COUNT(*) aggregate.
Definition aggregate.h:195
Represents a boolean expression tree.
Definition expression.h:37
Operation
Operation types for expressions.
Definition expression.h:40
Literal is a literal value that is associated with a primitive type.
Definition literal.h:39
Bound MAX aggregate.
Definition aggregate.h:208
Bound MIN aggregate.
Definition aggregate.h:223
A schema for a Table.
Definition schema.h:49
An immutable struct-like wrapper.
Definition struct_like.h:62
Template for unbound aggregates that carry a term and operation.
Definition aggregate.h:76
std::shared_ptr< NamedReference > reference() override
Returns the underlying named reference for this unbound term.
Definition aggregate.h:83
Base class for unbound aggregates.
Definition aggregate.h:66
Interface for unbound expressions that need schema binding.
Definition expression.h:339
STL namespace.
DataFile carries data file path, partition tuple, metrics, ...
Definition manifest_entry.h:62