iceberg-cpp
Loading...
Searching...
No Matches
table_properties.h
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
22#include <limits>
23#include <string>
24#include <unordered_map>
25#include <unordered_set>
26
27#include "iceberg/iceberg_export.h"
28#include "iceberg/util/config.h"
29
30namespace iceberg {
31
37class ICEBERG_EXPORT TableProperties : public ConfigBase<TableProperties> {
38 public:
39 template <typename T>
40 using Entry = const ConfigBase<TableProperties>::Entry<T>;
41
42 // Reserved table properties
43
55 inline static Entry<std::string> kFormatVersion{"format-version", ""};
57 inline static Entry<std::string> kUuid{"uuid", ""};
59 inline static Entry<std::string> kSnapshotCount{"snapshot-count", ""};
61 inline static Entry<std::string> kCurrentSnapshotSummary{"current-snapshot-summary",
62 ""};
64 inline static Entry<std::string> kCurrentSnapshotId{"current-snapshot-id", ""};
66 inline static Entry<std::string> kCurrentSnapshotTimestamp{
67 "current-snapshot-timestamp-ms", ""};
69 inline static Entry<std::string> kCurrentSchema{"current-schema", ""};
72 inline static Entry<std::string> kDefaultPartitionSpec{"default-partition-spec", ""};
75 inline static Entry<std::string> kDefaultSortOrder{"default-sort-order", ""};
76
77 // Commit properties
78
79 inline static Entry<int32_t> kCommitNumRetries{"commit.retry.num-retries", 4};
80 inline static Entry<int32_t> kCommitMinRetryWaitMs{"commit.retry.min-wait-ms", 100};
81 inline static Entry<int32_t> kCommitMaxRetryWaitMs{"commit.retry.max-wait-ms",
82 60 * 1000}; // 1 minute
83 inline static Entry<int32_t> kCommitTotalRetryTimeMs{"commit.retry.total-timeout-ms",
84 30 * 60 * 1000}; // 30 minutes
85 inline static Entry<int32_t> kCommitNumStatusChecks{"commit.status-check.num-retries",
86 3};
87 inline static Entry<int64_t> kCommitStatusChecksMinWaitMs{
88 "commit.status-check.min-wait-ms", int64_t{1000}}; // 1 second
89 inline static Entry<int64_t> kCommitStatusChecksMaxWaitMs{
90 "commit.status-check.max-wait-ms", int64_t{60 * 1000}}; // 1 minute
91 inline static Entry<int64_t> kCommitStatusChecksTotalWaitMs{
92 "commit.status-check.total-timeout-ms", int64_t{30 * 60 * 1000}}; // 30 minutes
93
94 // Manifest properties
95
96 inline static Entry<int64_t> kManifestTargetSizeBytes{
97 "commit.manifest.target-size-bytes", int64_t{8 * 1024 * 1024}}; // 8 MB
98 inline static Entry<int32_t> kManifestMinMergeCount{
99 "commit.manifest.min-count-to-merge", 100};
100 inline static Entry<bool> kManifestMergeEnabled{"commit.manifest-merge.enabled", true};
101
102 // File format properties
103
104 inline static Entry<std::string> kDefaultFileFormat{"write.format.default", "parquet"};
105 inline static Entry<std::string> kDeleteDefaultFileFormat{"write.delete.format.default",
106 "parquet"};
107
108 // Parquet properties
109
110 inline static Entry<int32_t> kParquetRowGroupSizeBytes{
111 "write.parquet.row-group-size-bytes", 128 * 1024 * 1024}; // 128 MB
112 inline static Entry<int32_t> kDeleteParquetRowGroupSizeBytes{
113 "write.delete.parquet.row-group-size-bytes", 128 * 1024 * 1024}; // 128 MB
114 inline static Entry<int32_t> kParquetPageSizeBytes{"write.parquet.page-size-bytes",
115 1024 * 1024}; // 1 MB
116 inline static Entry<int32_t> kDeleteParquetPageSizeBytes{
117 "write.delete.parquet.page-size-bytes", 1024 * 1024}; // 1 MB
118 inline static Entry<int32_t> kParquetPageRowLimit{"write.parquet.page-row-limit",
119 20'000};
120 inline static Entry<int32_t> kDeleteParquetPageRowLimit{
121 "write.delete.parquet.page-row-limit", 20'000};
122 inline static Entry<int32_t> kParquetDictSizeBytes{"write.parquet.dict-size-bytes",
123 2 * 1024 * 1024}; // 2 MB
124 inline static Entry<int32_t> kDeleteParquetDictSizeBytes{
125 "write.delete.parquet.dict-size-bytes", 2 * 1024 * 1024}; // 2 MB
126 inline static Entry<std::string> kParquetCompression{"write.parquet.compression-codec",
127 "zstd"};
128 inline static Entry<std::string> kDeleteParquetCompression{
129 "write.delete.parquet.compression-codec", "zstd"};
130 inline static Entry<std::string> kParquetCompressionLevel{
131 "write.parquet.compression-level", ""};
132 inline static Entry<std::string> kDeleteParquetCompressionLevel{
133 "write.delete.parquet.compression-level", ""};
134 inline static Entry<int32_t> kParquetRowGroupCheckMinRecordCount{
135 "write.parquet.row-group-check-min-record-count", 100};
136 inline static Entry<int32_t> kDeleteParquetRowGroupCheckMinRecordCount{
137 "write.delete.parquet.row-group-check-min-record-count", 100};
138 inline static Entry<int32_t> kParquetRowGroupCheckMaxRecordCount{
139 "write.parquet.row-group-check-max-record-count", 10'000};
140 inline static Entry<int32_t> kDeleteParquetRowGroupCheckMaxRecordCount{
141 "write.delete.parquet.row-group-check-max-record-count", 10'000};
142 inline static Entry<int32_t> kParquetBloomFilterMaxBytes{
143 "write.parquet.bloom-filter-max-bytes", 1024 * 1024}; // 1 MB
144 inline static std::string_view kParquetBloomFilterColumnFppPrefix{
145 "write.parquet.bloom-filter-fpp.column."};
146 inline static std::string_view kParquetBloomFilterColumnEnabledPrefix{
147 "write.parquet.bloom-filter-enabled.column."};
148 inline static std::string_view kParquetColumnStatsEnabledPrefix{
149 "write.parquet.stats-enabled.column."};
150
151 // Avro properties
152 inline static Entry<std::string> kAvroCompression{"write.avro.compression-codec",
153 "gzip"};
154 inline static Entry<std::string> kDeleteAvroCompression{
155 "write.delete.avro.compression-codec", "gzip"};
156 inline static Entry<std::string> kAvroCompressionLevel{"write.avro.compression-level",
157 ""};
158 inline static Entry<std::string> kDeleteAvroCompressionLevel{
159 "write.delete.avro.compression-level", ""};
160
161 // ORC properties
162 inline static Entry<int64_t> kOrcStripeSizeBytes{"write.orc.stripe-size-bytes",
163 int64_t{64} * 1024 * 1024};
164 inline static Entry<std::string> kOrcBloomFilterColumns{
165 "write.orc.bloom.filter.columns", ""};
166 inline static Entry<double> kOrcBloomFilterFpp{"write.orc.bloom.filter.fpp", 0.05};
167 inline static Entry<int64_t> kDeleteOrcStripeSizeBytes{
168 "write.delete.orc.stripe-size-bytes", int64_t{64} * 1024 * 1024}; // 64 MB
169 inline static Entry<int64_t> kOrcBlockSizeBytes{"write.orc.block-size-bytes",
170 int64_t{256} * 1024 * 1024}; // 256 MB
171 inline static Entry<int64_t> kDeleteOrcBlockSizeBytes{
172 "write.delete.orc.block-size-bytes", int64_t{256} * 1024 * 1024}; // 256 MB
173 inline static Entry<int32_t> kOrcWriteBatchSize{"write.orc.vectorized.batch-size",
174 1024};
175 inline static Entry<int32_t> kDeleteOrcWriteBatchSize{
176 "write.delete.orc.vectorized.batch-size", 1024};
177 inline static Entry<std::string> kOrcCompression{"write.orc.compression-codec", "zlib"};
178 inline static Entry<std::string> kDeleteOrcCompression{
179 "write.delete.orc.compression-codec", "zlib"};
180 inline static Entry<std::string> kOrcCompressionStrategy{
181 "write.orc.compression-strategy", "speed"};
182 inline static Entry<std::string> kDeleteOrcCompressionStrategy{
183 "write.delete.orc.compression-strategy", "speed"};
184
185 // Read properties
186
187 inline static Entry<int64_t> kSplitSize{"read.split.target-size",
188 int64_t{128} * 1024 * 1024}; // 128 MB
189 inline static Entry<int64_t> kMetadataSplitSize{"read.split.metadata-target-size",
190 int64_t{32} * 1024 * 1024}; // 32 MB
191 inline static Entry<int32_t> kSplitLookback{"read.split.planning-lookback", 10};
192 inline static Entry<int64_t> kSplitOpenFileCost{"read.split.open-file-cost",
193 int64_t{4} * 1024 * 1024}; // 4 MB
194 inline static Entry<bool> kAdaptiveSplitSizeEnabled{"read.split.adaptive-size.enabled",
195 true};
196 inline static Entry<bool> kParquetVectorizationEnabled{
197 "read.parquet.vectorization.enabled", true};
198 inline static Entry<int32_t> kParquetBatchSize{"read.parquet.vectorization.batch-size",
199 5000};
200 inline static Entry<bool> kOrcVectorizationEnabled{"read.orc.vectorization.enabled",
201 false};
202 inline static Entry<int32_t> kOrcBatchSize{"read.orc.vectorization.batch-size", 5000};
203 inline static Entry<std::string> kDataPlanningMode{"read.data-planning-mode", "auto"};
204 inline static Entry<std::string> kDeletePlanningMode{"read.delete-planning-mode",
205 "auto"};
206
207 // Write properties
208
209 inline static Entry<bool> kObjectStoreEnabled{"write.object-storage.enabled", false};
212 inline static Entry<bool> kWriteObjectStorePartitionedPaths{
213 "write.object-storage.partitioned-paths", true};
217 inline static Entry<std::string> kWriteDataLocation{"write.data.path", ""};
221 inline static Entry<std::string> kWriteMetadataLocation{"write.metadata.path", ""};
222 inline static Entry<int32_t> kWritePartitionSummaryLimit{
223 "write.summary.partition-limit", 0};
224 inline static Entry<std::string> kMetadataCompression{
225 "write.metadata.compression-codec", "none"};
226 inline static Entry<int32_t> kMetadataPreviousVersionsMax{
227 "write.metadata.previous-versions-max", 100};
229 inline static Entry<bool> kMetadataDeleteAfterCommitEnabled{
230 "write.metadata.delete-after-commit.enabled", false};
231 inline static Entry<int32_t> kMetricsMaxInferredColumnDefaults{
232 "write.metadata.metrics.max-inferred-column-defaults", 100};
233 inline static constexpr std::string_view kMetricModeColumnConfPrefix =
234 "write.metadata.metrics.column.";
235 inline static Entry<std::string> kDefaultWriteMetricsMode{
236 "write.metadata.metrics.default", "truncate(16)"};
237
238 inline static std::string_view kDefaultNameMapping{"schema.name-mapping.default"};
239
240 inline static Entry<bool> kWriteAuditPublishEnabled{"write.wap.enabled", false};
241 inline static Entry<int64_t> kWriteTargetFileSizeBytes{
242 "write.target-file-size-bytes", int64_t{512} * 1024 * 1024}; // 512 MB
243 inline static Entry<int64_t> kDeleteTargetFileSizeBytes{
244 "write.delete.target-file-size-bytes", int64_t{64} * 1024 * 1024}; // 64 MB
245
246 inline static Entry<bool> kSnapshotIdInheritanceEnabled{
247 "compatibility.snapshot-id-inheritance.enabled", false};
248
249 // Garbage collection properties
250
251 inline static Entry<bool> kGcEnabled{"gc.enabled", true};
252 inline static Entry<int64_t> kMaxSnapshotAgeMs{
253 "history.expire.max-snapshot-age-ms", int64_t{5} * 24 * 60 * 60 * 1000}; // 5 days
254 inline static Entry<int32_t> kMinSnapshotsToKeep{"history.expire.min-snapshots-to-keep",
255 1};
256 inline static Entry<int64_t> kMaxRefAgeMs{"history.expire.max-ref-age-ms",
257 (std::numeric_limits<int64_t>::max)()};
258
259 // Delete/Update/Merge properties
260
261 inline static Entry<std::string> kDeleteGranularity{"write.delete.granularity",
262 "partition"};
263 inline static Entry<std::string> kDeleteIsolationLevel{"write.delete.isolation-level",
264 "serializable"};
265 inline static Entry<std::string> kDeleteMode{"write.delete.mode", "copy-on-write"};
266
267 inline static Entry<std::string> kUpdateIsolationLevel{"write.update.isolation-level",
268 "serializable"};
269 inline static Entry<std::string> kUpdateMode{"write.update.mode", "copy-on-write"};
270
271 inline static Entry<std::string> kMergeIsolationLevel{"write.merge.isolation-level",
272 "serializable"};
273 inline static Entry<std::string> kMergeMode{"write.merge.mode", "copy-on-write"};
274
275 inline static Entry<bool> kUpsertEnabled{"write.upsert.enabled", false};
276
277 // Encryption properties
278
279 inline static Entry<std::string> kEncryptionTableKey{"encryption.key-id", ""};
280 inline static Entry<int32_t> kEncryptionDekLength{"encryption.data-key-length", 16};
281
289 static const std::unordered_set<std::string>& reserved_properties();
290
292 static const std::unordered_set<std::string>& commit_properties();
293
298 static TableProperties FromMap(std::unordered_map<std::string, std::string> properties);
299
300 bool operator==(const TableProperties& other) const {
301 return configs_ == other.configs_;
302 }
303};
304
305} // namespace iceberg
Definition config.h:70
Definition config.h:67
Table properties for Iceberg tables.
Definition table_properties.h:37