use of com.pingcap.tikv.columnar.BatchedTiChunkColumnVector in project tispark by pingcap.
the class CoprocessorIterator method getTiChunkIterator.
/**
* Build a DAGIterator from TiDAGRequest and region tasks to get rows
*
* <p>When we are preforming a scan request using coveringIndex, {@link
* com.pingcap.tidb.tipb.IndexScan} should be used to read index rows. In other circumstances,
* {@link com.pingcap.tidb.tipb.TableScan} is used to scan table rows.
*
* @param req TiDAGRequest built
* @param regionTasks a list or RegionTask each contains a task on a single region
* @param session TiSession
* @return a DAGIterator to be processed
*/
public static CoprocessorIterator<TiChunk> getTiChunkIterator(TiDAGRequest req, List<RegionTask> regionTasks, TiSession session, int numOfRows) {
TiDAGRequest dagRequest = req.copy();
return new DAGIterator<TiChunk>(dagRequest.buildTableScan(), regionTasks, session, SchemaInfer.create(dagRequest), dagRequest.getPushDownType(), dagRequest.getStoreType(), dagRequest.getStartTs().getVersion()) {
@Override
public TiChunk next() {
DataType[] dataTypes = this.schemaInfer.getTypes().toArray(new DataType[0]);
// TODO tiColumnarBatch is meant to be reused in the entire data loading process.
if (this.encodeType == EncodeType.TypeDefault) {
Row[] rows = new Row[numOfRows];
int count = 0;
for (int i = 0; i < rows.length && hasNext(); i++) {
rows[i] = rowReader.readRow(dataTypes);
count += 1;
}
TiRowColumnVector[] columnarVectors = new TiRowColumnVector[dataTypes.length];
for (int i = 0; i < dataTypes.length; i++) {
columnarVectors[i] = new TiRowColumnVector(dataTypes[i], i, rows, count);
}
return new TiChunk(columnarVectors);
} else if (this.encodeType == EncodeType.TypeChunk) {
TiColumnVector[] columnarVectors = new TiColumnVector[dataTypes.length];
List<List<TiChunkColumnVector>> childColumnVectors = new ArrayList<>();
for (int i = 0; i < dataTypes.length; i++) {
childColumnVectors.add(new ArrayList<>());
}
int count = 0;
// TODO(Zhexuan Yang) we need control memory limit in case of out of memory error
while (count < numOfRows && hasNext()) {
for (int i = 0; i < dataTypes.length; i++) {
childColumnVectors.get(i).add(dataTypes[i].decodeChunkColumn(dataInput));
}
int size = childColumnVectors.get(0).size();
count += childColumnVectors.get(0).get(size - 1).numOfRows();
// left data should be trashed.
dataInput = new CodecDataInput(new byte[0]);
}
for (int i = 0; i < dataTypes.length; i++) {
columnarVectors[i] = new BatchedTiChunkColumnVector(childColumnVectors.get(i), count);
}
return new TiChunk(columnarVectors);
} else {
// reading column count
long colCount = IntegerCodec.readUVarLong(dataInput);
long numOfRows = IntegerCodec.readUVarLong(dataInput);
TiColumnVector[] columnVectors = new TiColumnVector[(int) colCount];
for (int columnIdx = 0; columnIdx < colCount; columnIdx++) {
// reading column name
long length = IntegerCodec.readUVarLong(dataInput);
for (int i = 0; i < length; i++) {
dataInput.readByte();
}
// reading type name
length = IntegerCodec.readUVarLong(dataInput);
byte[] utf8Bytes = new byte[(int) length];
for (int i = 0; i < length; i++) {
utf8Bytes[i] = dataInput.readByte();
}
String typeName = new String(utf8Bytes, StandardCharsets.UTF_8);
CHType type = CHTypeMapping.parseType(typeName);
columnVectors[columnIdx] = type.decode(dataInput, (int) numOfRows);
// TODO this is workaround to bybass nullable type
}
dataInput = new CodecDataInput(new byte[0]);
return new TiChunk(columnVectors);
}
}
};
}
Aggregations