31#include "iceberg/result.h"
33#include "iceberg/util/config.h"
54 virtual Result<std::optional<ArrowArray>>
Next() = 0;
57 virtual Result<ArrowSchema>
Schema() = 0;
60 virtual Result<std::unordered_map<std::string, std::string>>
Metadata() = 0;
77 inline static Entry<int64_t>
kBatchSize{
"read.batch-size", 4096};
83 inline static Entry<int64_t>
kAvroBufferSize{
"read.avro.buffer-size", 1024 * 1024};
87 const std::unordered_map<std::string, std::string>& properties);
99 std::shared_ptr<class FileIO>
io;
104 std::shared_ptr<class Expression>
filter;
121 static ReaderFactory& GetFactory(FileFormatType format_type);
124 static Result<std::unique_ptr<Reader>> Open(FileFormatType format_type,
Definition file_reader.h:71
static ReaderProperties FromMap(const std::unordered_map< std::string, std::string > &properties)
Create a ReaderProperties instance from a map of key-value pairs.
Definition file_reader.cc:62
static Entry< int64_t > kAvroBufferSize
The buffer size used by Avro input stream.
Definition file_reader.h:83
static Entry< int64_t > kBatchSize
The batch size to read.
Definition file_reader.h:77
static Entry< bool > kAvroSkipDatum
Skip GenericDatum in Avro reader for better performance. When true, decode directly from Avro to Arro...
Definition file_reader.h:81
Base reader class to read data from different file formats.
Definition file_reader.h:38
virtual Result< std::optional< ArrowArray > > Next()=0
Read next data from file.
virtual Result< ArrowSchema > Schema()=0
Get the schema of the data.
virtual Result< std::unordered_map< std::string, std::string > > Metadata()=0
Get the metadata of the file.
virtual Status Open(const ReaderOptions &options)=0
Open the reader.
virtual Status Close()=0
Close the reader.
std::function< Result< std::unique_ptr< Reader > >()> ReaderFactory
Factory function to create a reader of a specific file format.
Definition file_reader.h:113
Registry of reader factories for different file formats.
Definition file_reader.h:116
Options for creating a reader.
Definition file_reader.h:91
std::shared_ptr< class Schema > projection
The projection schema to read from the file. This field is required.
Definition file_reader.h:101
std::shared_ptr< class FileIO > io
FileIO instance to open the file.
Definition file_reader.h:99
std::optional< size_t > length
The total length of the file.
Definition file_reader.h:95
std::string path
The path to the file to read.
Definition file_reader.h:93
std::shared_ptr< class NameMapping > name_mapping
Name mapping for schema evolution compatibility. Used when reading files that may have different fiel...
Definition file_reader.h:107
ReaderProperties properties
Format-specific or implementation-specific properties.
Definition file_reader.h:109
std::optional< Split > split
The split to read.
Definition file_reader.h:97
std::shared_ptr< class Expression > filter
The filter to apply to the data. Reader implementations may ignore this if the file format does not s...
Definition file_reader.h:104
A split of the file to read.
Definition file_reader.h:64
size_t offset
The offset of the split.
Definition file_reader.h:66
size_t length
The length of the split.
Definition file_reader.h:68