iceberg-cpp
Loading...
Searching...
No Matches
data_file_set.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
25
26#include <iterator>
27#include <memory>
28#include <span>
29#include <string_view>
30#include <unordered_map>
31#include <vector>
32
33#include "iceberg/iceberg_export.h"
35#include "iceberg/util/string_util.h"
36
37namespace iceberg {
38
41class ICEBERG_EXPORT DataFileSet {
42 public:
43 using value_type = std::shared_ptr<DataFile>;
44 using iterator = typename std::vector<value_type>::iterator;
45 using const_iterator = typename std::vector<value_type>::const_iterator;
46 using difference_type = typename std::vector<value_type>::difference_type;
47
48 DataFileSet() = default;
49
54 std::pair<iterator, bool> insert(const value_type& file) { return InsertImpl(file); }
55
57 std::pair<iterator, bool> insert(value_type&& file) {
58 return InsertImpl(std::move(file));
59 }
60
62 size_t size() const { return elements_.size(); }
63
65 bool empty() const { return elements_.empty(); }
66
68 void clear() {
69 elements_.clear();
70 index_by_path_.clear();
71 }
72
74 iterator begin() { return elements_.begin(); }
75 const_iterator begin() const { return elements_.begin(); }
76 const_iterator cbegin() const { return elements_.cbegin(); }
77
79 iterator end() { return elements_.end(); }
80 const_iterator end() const { return elements_.end(); }
81 const_iterator cend() const { return elements_.cend(); }
82
84 std::span<const value_type> as_span() const { return elements_; }
85
86 private:
87 std::pair<iterator, bool> InsertImpl(value_type file) {
88 if (!file) {
89 return {elements_.end(), false};
90 }
91
92 auto [index_iter, inserted] =
93 index_by_path_.try_emplace(file->file_path, elements_.size());
94 if (!inserted) {
95 auto pos = static_cast<difference_type>(index_iter->second);
96 return {elements_.begin() + pos, false};
97 }
98
99 elements_.push_back(std::move(file));
100 return {std::prev(elements_.end()), true};
101 }
102
103 // Vector to preserve insertion order
104 std::vector<value_type> elements_;
105 std::unordered_map<std::string_view, size_t, StringHash, StringEqual> index_by_path_;
106};
107
108} // namespace iceberg
A set of DataFile pointers with insertion order preserved and deduplicated by file path.
Definition data_file_set.h:41
iterator end()
Get iterator to the end.
Definition data_file_set.h:79
void clear()
Clear all elements from the set.
Definition data_file_set.h:68
std::span< const value_type > as_span() const
Get a non-owning view of the data files in insertion order.
Definition data_file_set.h:84
size_t size() const
Get the number of elements in the set.
Definition data_file_set.h:62
std::pair< iterator, bool > insert(const value_type &file)
Insert a data file into the set.
Definition data_file_set.h:54
iterator begin()
Get iterator to the beginning.
Definition data_file_set.h:74
bool empty() const
Check if the set is empty.
Definition data_file_set.h:65
std::pair< iterator, bool > insert(value_type &&file)
Insert a data file into the set (move version).
Definition data_file_set.h:57