iceberg-cpp
Loading...
Searching...
No Matches
file_writer.h
Go to the documentation of this file.
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#pragma once
21
24
25#include <functional>
26#include <memory>
27#include <optional>
28#include <vector>
29
31#include "iceberg/file_format.h"
32#include "iceberg/metrics.h"
33#include "iceberg/result.h"
34#include "iceberg/type_fwd.h"
35#include "iceberg/util/config.h"
36
37namespace iceberg {
38
39class ICEBERG_EXPORT WriterProperties : public ConfigBase<WriterProperties> {
40 public:
41 template <typename T>
42 using Entry = const ConfigBase<WriterProperties>::Entry<T>;
43
45 inline static Entry<std::string> kAvroSchemaName{"write.avro.schema-name", ""};
47 inline static Entry<int64_t> kAvroBufferSize{"write.avro.buffer-size", 1024 * 1024};
49 inline static Entry<int64_t> kAvroSyncInterval{"write.avro.sync-interval", 16 * 1024};
52 inline static Entry<bool> kAvroSkipDatum{"write.avro.skip-datum", true};
53 inline static Entry<std::string> kAvroCompression{"write.avro.compression-codec",
54 "gzip"};
55 inline static Entry<std::string> kAvroCompressionLevel{"write.avro.compression-level",
56 ""};
57
58 inline static Entry<std::string> kParquetCompression{"write.parquet.compression-codec",
59 "zstd"};
60 inline static Entry<std::string> kParquetCompressionLevel{
61 "write.parquet.compression-level", ""};
62
64
66 static WriterProperties FromMap(
67 const std::unordered_map<std::string, std::string>& properties);
68};
69
71struct ICEBERG_EXPORT WriterOptions {
73 std::string path;
75 std::shared_ptr<Schema> schema;
77 std::shared_ptr<class FileIO> io;
79 std::unordered_map<std::string, std::string> metadata;
82};
83
85class ICEBERG_EXPORT Writer {
86 public:
87 virtual ~Writer() = default;
88 Writer() = default;
89 Writer(const Writer&) = delete;
90 Writer& operator=(const Writer&) = delete;
91
93 virtual Status Open(const WriterOptions& options) = 0;
94
96 virtual Status Close() = 0;
97
102 virtual Status Write(ArrowArray* data) = 0;
103
106 virtual Result<Metrics> metrics() = 0;
107
110 virtual Result<int64_t> length() = 0;
111
116 virtual std::vector<int64_t> split_offsets() = 0;
117};
118
120using WriterFactory = std::function<Result<std::unique_ptr<Writer>>()>;
121
123struct ICEBERG_EXPORT WriterFactoryRegistry {
125 WriterFactoryRegistry(FileFormatType format_type, WriterFactory factory);
126
128 static WriterFactory& GetFactory(FileFormatType format_type);
129
131 static Result<std::unique_ptr<Writer>> Open(FileFormatType format_type,
132 const WriterOptions& options);
133};
134
135} // namespace iceberg
Definition config.h:70
Definition config.h:67
Definition file_writer.h:39
Base writer class to write data from different file formats.
Definition file_writer.h:85
virtual Status Open(const WriterOptions &options)=0
Open the writer.
virtual Result< Metrics > metrics()=0
Get the file statistics. Only valid after the file is closed.
virtual Result< int64_t > length()=0
Get the file length. This can be called while the writer is still open or after the file is closed.
virtual std::vector< int64_t > split_offsets()=0
Returns a list of recommended split locations, if applicable, empty otherwise. When available,...
virtual Status Write(ArrowArray *data)=0
Write arrow data to the file.
virtual Status Close()=0
Close the writer.
std::function< Result< std::unique_ptr< Writer > >()> WriterFactory
Factory function to create a writer of a specific file format.
Definition file_writer.h:120
Definition arrow_c_data.h:57
Registry of writer factories for different file formats.
Definition file_writer.h:123
Options for creating a writer.
Definition file_writer.h:71
WriterProperties properties
Format-specific or implementation-specific properties.
Definition file_writer.h:81
std::string path
The path to the file to write.
Definition file_writer.h:73
std::shared_ptr< Schema > schema
The schema of the data to write.
Definition file_writer.h:75
std::unordered_map< std::string, std::string > metadata
Metadata to write to the file.
Definition file_writer.h:79
std::shared_ptr< class FileIO > io
FileIO instance to create the file.
Definition file_writer.h:77