61 static Result<std::unique_ptr<ManifestGroup>> Make(
62 std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
63 std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_,
64 std::vector<ManifestFile> manifests);
73 static Result<std::unique_ptr<ManifestGroup>> Make(
74 std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
75 std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
76 std::vector<ManifestFile> data_manifests,
77 std::vector<ManifestFile> delete_manifests);
87 ManifestGroup& FilterData(std::shared_ptr<Expression> filter);
90 ManifestGroup& FilterFiles(std::shared_ptr<Expression> filter);
93 ManifestGroup& FilterPartitions(std::shared_ptr<Expression> filter);
121 ManifestGroup& ColumnsToKeepStats(std::unordered_set<int32_t> column_ids);
124 Result<std::vector<std::shared_ptr<FileScanTask>>> PlanFiles();
127 Result<std::vector<ManifestEntry>> Entries();
129 using CreateTasksFunction =
130 std::function<Result<std::vector<std::shared_ptr<ScanTask>>>(
131 std::vector<ManifestEntry>&&,
const TaskContext&)>;
137 Result<std::vector<std::shared_ptr<ScanTask>>> Plan(
138 const CreateTasksFunction& create_tasks);
141 ManifestGroup(std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
142 std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
143 std::vector<ManifestFile> data_manifests,
144 DeleteFileIndex::Builder&& delete_index_builder);
146 Result<std::unordered_map<int32_t, std::vector<ManifestEntry>>> ReadEntries();
148 Result<std::unique_ptr<ManifestReader>> MakeReader(
const ManifestFile& manifest);
150 std::shared_ptr<FileIO> io_;
151 std::shared_ptr<Schema> schema_;
152 std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_;
153 std::vector<ManifestFile> data_manifests_;
154 DeleteFileIndex::Builder delete_index_builder_;
155 std::shared_ptr<Expression> data_filter_;
156 std::shared_ptr<Expression> file_filter_;
157 std::shared_ptr<Expression> partition_filter_;
158 std::function<bool(
const ManifestEntry&)> manifest_entry_predicate_;
159 std::vector<std::string> columns_;
160 std::unordered_set<int32_t> columns_to_keep_stats_;
161 bool case_sensitive_ =
true;
162 bool ignore_deleted_ =
false;
163 bool ignore_existing_ =
false;
164 bool ignore_residuals_ =
false;
A manifest is an immutable Avro file that lists data files or delete files, along with each file's pa...
Definition manifest_entry.h:307
Context passed to task creation functions.
Definition manifest_group.h:43