use of com.pingcap.tikv.columnar.TiChunk in project plugins by qlangtech.
the class TiKVDataSourceDumper method startDump.
@Override
public Iterator<Map<String, Object>> startDump() {
this.tiSession = dsFactory.getTiSession();
// Catalog cat = this.tiSession.getCatalog();
// TiDBInfo db = cat.getDatabase(dbName);
// TiTableInfo tiTable = cat.getTable(db, table.getTableName());
TiDAGRequest dagRequest = dsFactory.getTiDAGRequest(this.targetCols, tiSession, tab.tableInfo);
Snapshot snapshot = tiSession.createSnapshot(dagRequest.getStartTs());
// 取得的是列向量
Iterator<TiChunk> tiChunkIterator = snapshot.tableReadChunk(dagRequest, this.partition.tasks, 1024);
return new Iterator<Map<String, Object>>() {
TiChunk next = null;
int numOfRows = -1;
int rowIndex = -1;
TiColumnVector column = null;
ColumnMetaData columnMetaData;
@Override
public boolean hasNext() {
if (next != null) {
if (rowIndex++ < (numOfRows - 1)) {
return true;
}
next = null;
numOfRows = -1;
rowIndex = -1;
}
boolean hasNext = tiChunkIterator.hasNext();
if (hasNext) {
next = tiChunkIterator.next();
if (next == null) {
throw new IllegalStateException("next TiChunk can not be null");
}
rowIndex = 0;
numOfRows = next.numOfRows();
}
return hasNext;
}
@Override
public Map<String, Object> next() {
Map<String, Object> row = new HashMap<>();
MySQLType colType = null;
for (int i = 0; i < targetCols.size(); i++) {
column = next.column(i);
if (column.isNullAt(rowIndex)) {
continue;
}
colType = column.dataType().getType();
columnMetaData = targetCols.get(i);
if (colType == MySQLType.TypeVarchar || colType == MySQLType.TypeString || colType == MySQLType.TypeBlob) {
row.put(columnMetaData.getKey(), filter(column.getUTF8String(rowIndex)));
} else if (colType == MySQLType.TypeDate || colType == MySQLType.TypeNewDate) {
// FIXME 日期格式化 一个1970年的一个偏移量,按照实际情况估计要重新format一下
// https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-date
row.put(columnMetaData.getKey(), // :
column.getLong(rowIndex));
} else if (colType == MySQLType.TypeTimestamp || colType == MySQLType.TypeDatetime) {
row.put(columnMetaData.getKey(), // :
column.getLong(rowIndex));
} else {
row.put(columnMetaData.getKey(), column.getUTF8String(rowIndex));
}
}
return row;
}
};
}
use of com.pingcap.tikv.columnar.TiChunk in project tispark by pingcap.
the class CoprocessorIterator method getTiChunkIterator.
/**
* Build a DAGIterator from TiDAGRequest and region tasks to get rows
*
* <p>When we are preforming a scan request using coveringIndex, {@link
* com.pingcap.tidb.tipb.IndexScan} should be used to read index rows. In other circumstances,
* {@link com.pingcap.tidb.tipb.TableScan} is used to scan table rows.
*
* @param req TiDAGRequest built
* @param regionTasks a list or RegionTask each contains a task on a single region
* @param session TiSession
* @return a DAGIterator to be processed
*/
public static CoprocessorIterator<TiChunk> getTiChunkIterator(TiDAGRequest req, List<RegionTask> regionTasks, TiSession session, int numOfRows) {
TiDAGRequest dagRequest = req.copy();
return new DAGIterator<TiChunk>(dagRequest.buildTableScan(), regionTasks, session, SchemaInfer.create(dagRequest), dagRequest.getPushDownType(), dagRequest.getStoreType(), dagRequest.getStartTs().getVersion()) {
@Override
public TiChunk next() {
DataType[] dataTypes = this.schemaInfer.getTypes().toArray(new DataType[0]);
// TODO tiColumnarBatch is meant to be reused in the entire data loading process.
if (this.encodeType == EncodeType.TypeDefault) {
Row[] rows = new Row[numOfRows];
int count = 0;
for (int i = 0; i < rows.length && hasNext(); i++) {
rows[i] = rowReader.readRow(dataTypes);
count += 1;
}
TiRowColumnVector[] columnarVectors = new TiRowColumnVector[dataTypes.length];
for (int i = 0; i < dataTypes.length; i++) {
columnarVectors[i] = new TiRowColumnVector(dataTypes[i], i, rows, count);
}
return new TiChunk(columnarVectors);
} else if (this.encodeType == EncodeType.TypeChunk) {
TiColumnVector[] columnarVectors = new TiColumnVector[dataTypes.length];
List<List<TiChunkColumnVector>> childColumnVectors = new ArrayList<>();
for (int i = 0; i < dataTypes.length; i++) {
childColumnVectors.add(new ArrayList<>());
}
int count = 0;
// TODO(Zhexuan Yang) we need control memory limit in case of out of memory error
while (count < numOfRows && hasNext()) {
for (int i = 0; i < dataTypes.length; i++) {
childColumnVectors.get(i).add(dataTypes[i].decodeChunkColumn(dataInput));
}
int size = childColumnVectors.get(0).size();
count += childColumnVectors.get(0).get(size - 1).numOfRows();
// left data should be trashed.
dataInput = new CodecDataInput(new byte[0]);
}
for (int i = 0; i < dataTypes.length; i++) {
columnarVectors[i] = new BatchedTiChunkColumnVector(childColumnVectors.get(i), count);
}
return new TiChunk(columnarVectors);
} else {
// reading column count
long colCount = IntegerCodec.readUVarLong(dataInput);
long numOfRows = IntegerCodec.readUVarLong(dataInput);
TiColumnVector[] columnVectors = new TiColumnVector[(int) colCount];
for (int columnIdx = 0; columnIdx < colCount; columnIdx++) {
// reading column name
long length = IntegerCodec.readUVarLong(dataInput);
for (int i = 0; i < length; i++) {
dataInput.readByte();
}
// reading type name
length = IntegerCodec.readUVarLong(dataInput);
byte[] utf8Bytes = new byte[(int) length];
for (int i = 0; i < length; i++) {
utf8Bytes[i] = dataInput.readByte();
}
String typeName = new String(utf8Bytes, StandardCharsets.UTF_8);
CHType type = CHTypeMapping.parseType(typeName);
columnVectors[columnIdx] = type.decode(dataInput, (int) numOfRows);
// TODO this is workaround to bybass nullable type
}
dataInput = new CodecDataInput(new byte[0]);
return new TiChunk(columnVectors);
}
}
};
}
Aggregations