forked from ClickHouse/ClickHouse
-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathIInputFormat.h
More file actions
90 lines (61 loc) · 3.04 KB
/
IInputFormat.h
File metadata and controls
90 lines (61 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#pragma once
#include <Formats/ColumnMapping.h>
#include <IO/ReadBuffer.h>
#include <Interpreters/Context.h>
#include <Processors/Formats/InputFormatErrorsLogger.h>
#include <Processors/SourceWithKeyCondition.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Core/BlockMissingValues.h>
namespace DB
{
struct SelectQueryInfo;
using ColumnMappingPtr = std::shared_ptr<ColumnMapping>;
/** Input format is a source, that reads data from ReadBuffer.
*/
class IInputFormat : public SourceWithKeyCondition
{
protected:
ReadBuffer * in [[maybe_unused]] = nullptr;
public:
/// ReadBuffer can be nullptr for random-access formats.
IInputFormat(Block header, ReadBuffer * in_);
Chunk generate() override;
/// All data reading from the read buffer must be performed by this method.
virtual Chunk read() = 0;
/** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format.
* The recreating of parser for each small stream takes too long, so we introduce a method
* resetParser() which allow to reset the state of parser to continue reading of
* source stream without recreating that.
* That should be called after current buffer was fully read.
*/
virtual void resetParser();
virtual void setReadBuffer(ReadBuffer & in_);
virtual void resetReadBuffer() { in = nullptr; }
virtual const BlockMissingValues * getMissingValues() const { return nullptr; }
/// Must be called from ParallelParsingInputFormat after readSuffix
ColumnMappingPtr getColumnMapping() const { return column_mapping; }
/// Must be called from ParallelParsingInputFormat before readPrefix
void setColumnMapping(ColumnMappingPtr column_mapping_) { column_mapping = column_mapping_; }
/// Set the number of rows that was already read in
/// parallel parsing before creating this parser.
virtual void setRowsReadBefore(size_t /*rows*/) {}
/// Sets the serialization hints for the columns. It allows to create columns
/// in custom serializations (e.g. Sparse) for parsing and avoid extra conversion.
virtual void setSerializationHints(const SerializationInfoByName & /*hints*/) {}
void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.emplace_back(std::move(buffer)); }
void setErrorsLogger(const InputFormatErrorsLoggerPtr & errors_logger_) { errors_logger = errors_logger_; }
virtual size_t getApproxBytesReadForChunk() const { return 0; }
void needOnlyCount() { need_only_count = true; }
/// Set additional info/key/id related to underlying storage of the ReadBuffer
virtual void setStorageRelatedUniqueKey(const Settings & /*settings*/, const String & /*key*/) {}
protected:
ReadBuffer & getReadBuffer() const { chassert(in); return *in; }
virtual Chunk getChunkForCount(size_t rows);
ColumnMappingPtr column_mapping{};
InputFormatErrorsLoggerPtr errors_logger;
bool need_only_count = false;
private:
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
};
using InputFormatPtr = std::shared_ptr<IInputFormat>;
}