use of com.pingcap.tikv.meta.TiDAGRequest in project plugins by qlangtech.
the class TiKVDataSourceDumper method startDump.
@Override
public Iterator<Map<String, Object>> startDump() {
this.tiSession = dsFactory.getTiSession();
// Catalog cat = this.tiSession.getCatalog();
// TiDBInfo db = cat.getDatabase(dbName);
// TiTableInfo tiTable = cat.getTable(db, table.getTableName());
TiDAGRequest dagRequest = dsFactory.getTiDAGRequest(this.targetCols, tiSession, tab.tableInfo);
Snapshot snapshot = tiSession.createSnapshot(dagRequest.getStartTs());
// 取得的是列向量
Iterator<TiChunk> tiChunkIterator = snapshot.tableReadChunk(dagRequest, this.partition.tasks, 1024);
return new Iterator<Map<String, Object>>() {
TiChunk next = null;
int numOfRows = -1;
int rowIndex = -1;
TiColumnVector column = null;
ColumnMetaData columnMetaData;
@Override
public boolean hasNext() {
if (next != null) {
if (rowIndex++ < (numOfRows - 1)) {
return true;
}
next = null;
numOfRows = -1;
rowIndex = -1;
}
boolean hasNext = tiChunkIterator.hasNext();
if (hasNext) {
next = tiChunkIterator.next();
if (next == null) {
throw new IllegalStateException("next TiChunk can not be null");
}
rowIndex = 0;
numOfRows = next.numOfRows();
}
return hasNext;
}
@Override
public Map<String, Object> next() {
Map<String, Object> row = new HashMap<>();
MySQLType colType = null;
for (int i = 0; i < targetCols.size(); i++) {
column = next.column(i);
if (column.isNullAt(rowIndex)) {
continue;
}
colType = column.dataType().getType();
columnMetaData = targetCols.get(i);
if (colType == MySQLType.TypeVarchar || colType == MySQLType.TypeString || colType == MySQLType.TypeBlob) {
row.put(columnMetaData.getKey(), filter(column.getUTF8String(rowIndex)));
} else if (colType == MySQLType.TypeDate || colType == MySQLType.TypeNewDate) {
// FIXME 日期格式化 一个1970年的一个偏移量,按照实际情况估计要重新format一下
// https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-date
row.put(columnMetaData.getKey(), // :
column.getLong(rowIndex));
} else if (colType == MySQLType.TypeTimestamp || colType == MySQLType.TypeDatetime) {
row.put(columnMetaData.getKey(), // :
column.getLong(rowIndex));
} else {
row.put(columnMetaData.getKey(), column.getUTF8String(rowIndex));
}
}
return row;
}
};
}
use of com.pingcap.tikv.meta.TiDAGRequest in project tispark by pingcap.
the class CoprocessorIterator method getTiChunkIterator.
/**
* Build a DAGIterator from TiDAGRequest and region tasks to get rows
*
* <p>When we are preforming a scan request using coveringIndex, {@link
* com.pingcap.tidb.tipb.IndexScan} should be used to read index rows. In other circumstances,
* {@link com.pingcap.tidb.tipb.TableScan} is used to scan table rows.
*
* @param req TiDAGRequest built
* @param regionTasks a list or RegionTask each contains a task on a single region
* @param session TiSession
* @return a DAGIterator to be processed
*/
public static CoprocessorIterator<TiChunk> getTiChunkIterator(TiDAGRequest req, List<RegionTask> regionTasks, TiSession session, int numOfRows) {
TiDAGRequest dagRequest = req.copy();
return new DAGIterator<TiChunk>(dagRequest.buildTableScan(), regionTasks, session, SchemaInfer.create(dagRequest), dagRequest.getPushDownType(), dagRequest.getStoreType(), dagRequest.getStartTs().getVersion()) {
@Override
public TiChunk next() {
DataType[] dataTypes = this.schemaInfer.getTypes().toArray(new DataType[0]);
// TODO tiColumnarBatch is meant to be reused in the entire data loading process.
if (this.encodeType == EncodeType.TypeDefault) {
Row[] rows = new Row[numOfRows];
int count = 0;
for (int i = 0; i < rows.length && hasNext(); i++) {
rows[i] = rowReader.readRow(dataTypes);
count += 1;
}
TiRowColumnVector[] columnarVectors = new TiRowColumnVector[dataTypes.length];
for (int i = 0; i < dataTypes.length; i++) {
columnarVectors[i] = new TiRowColumnVector(dataTypes[i], i, rows, count);
}
return new TiChunk(columnarVectors);
} else if (this.encodeType == EncodeType.TypeChunk) {
TiColumnVector[] columnarVectors = new TiColumnVector[dataTypes.length];
List<List<TiChunkColumnVector>> childColumnVectors = new ArrayList<>();
for (int i = 0; i < dataTypes.length; i++) {
childColumnVectors.add(new ArrayList<>());
}
int count = 0;
// TODO(Zhexuan Yang) we need control memory limit in case of out of memory error
while (count < numOfRows && hasNext()) {
for (int i = 0; i < dataTypes.length; i++) {
childColumnVectors.get(i).add(dataTypes[i].decodeChunkColumn(dataInput));
}
int size = childColumnVectors.get(0).size();
count += childColumnVectors.get(0).get(size - 1).numOfRows();
// left data should be trashed.
dataInput = new CodecDataInput(new byte[0]);
}
for (int i = 0; i < dataTypes.length; i++) {
columnarVectors[i] = new BatchedTiChunkColumnVector(childColumnVectors.get(i), count);
}
return new TiChunk(columnarVectors);
} else {
// reading column count
long colCount = IntegerCodec.readUVarLong(dataInput);
long numOfRows = IntegerCodec.readUVarLong(dataInput);
TiColumnVector[] columnVectors = new TiColumnVector[(int) colCount];
for (int columnIdx = 0; columnIdx < colCount; columnIdx++) {
// reading column name
long length = IntegerCodec.readUVarLong(dataInput);
for (int i = 0; i < length; i++) {
dataInput.readByte();
}
// reading type name
length = IntegerCodec.readUVarLong(dataInput);
byte[] utf8Bytes = new byte[(int) length];
for (int i = 0; i < length; i++) {
utf8Bytes[i] = dataInput.readByte();
}
String typeName = new String(utf8Bytes, StandardCharsets.UTF_8);
CHType type = CHTypeMapping.parseType(typeName);
columnVectors[columnIdx] = type.decode(dataInput, (int) numOfRows);
// TODO this is workaround to bybass nullable type
}
dataInput = new CodecDataInput(new byte[0]);
return new TiChunk(columnVectors);
}
}
};
}
use of com.pingcap.tikv.meta.TiDAGRequest in project tispark by pingcap.
the class SchemaInferTest method simpleSelectSchemaInferTest.
@Test
public void simpleSelectSchemaInferTest() {
// select name from t1;
TiDAGRequest tiDAGRequest = new TiDAGRequest(TiDAGRequest.PushDownType.NORMAL);
tiDAGRequest.addRequiredColumn(name);
tiDAGRequest.setTableInfo(table);
tiDAGRequest.setStartTs(ts);
List<DataType> dataTypes = SchemaInfer.create(tiDAGRequest).getTypes();
assertEquals(1, dataTypes.size());
assertEquals(StringType.VARCHAR.getClass(), dataTypes.get(0).getClass());
}
use of com.pingcap.tikv.meta.TiDAGRequest in project tispark by pingcap.
the class SchemaInferTest method makeSelectDAGReq.
private List<TiDAGRequest> makeSelectDAGReq(ByItem... byItems) {
List<TiDAGRequest> reqs = new ArrayList<>();
for (ByItem byItem : byItems) {
// select sum(number) from t1 group by name;
TiDAGRequest dagRequest = new TiDAGRequest(TiDAGRequest.PushDownType.NORMAL);
dagRequest.setTableInfo(table);
dagRequest.addRequiredColumn(name);
dagRequest.addRequiredColumn(number);
dagRequest.addAggregate(sum);
dagRequest.getGroupByItems().add(byItem);
dagRequest.setStartTs(ts);
reqs.add(dagRequest);
}
return reqs;
}
use of com.pingcap.tikv.meta.TiDAGRequest in project plugins by qlangtech.
the class TiKVDataSourceFactory method getDataDumpers.
public DataDumpers getDataDumpers(TISTable table, Optional<Long> regionId) {
// target cols
final List<ColumnMetaData> reflectCols = table.getReflectCols();
if (CollectionUtils.isEmpty(reflectCols)) {
throw new IllegalStateException("param reflectCols can not be null");
}
final AtomicReference<TiTableInfoWrapper> tabRef = new AtomicReference<>();
final List<TiPartition> parts = this.openTiDB((session, c, db) -> {
TiTableInfo tiTable = c.getTable(db, table.getTableName());
Objects.requireNonNull(tiTable, "table:" + table.getTableName() + " can not find relevant table in TiDB");
tabRef.set(new TiTableInfoWrapper(tiTable));
TiDAGRequest dagRequest = getTiDAGRequest(reflectCols, session, tiTable);
List<Long> prunedPhysicalIds = dagRequest.getPrunedPhysicalIds();
return prunedPhysicalIds.stream().flatMap((prunedPhysicalId) -> createPartitions(prunedPhysicalId, session, dagRequest.copyReqWithPhysicalId(prunedPhysicalId), regionId).stream()).collect(Collectors.toList());
});
int[] index = new int[1];
final int splitCount = parts.size();
Objects.requireNonNull(tabRef.get(), "instacne of TiTableInfo can not be null");
Iterator<IDataSourceDumper> dumpers = new Iterator<IDataSourceDumper>() {
@Override
public boolean hasNext() {
return index[0] < splitCount;
}
@Override
public IDataSourceDumper next() {
return new TiKVDataSourceDumper(TiKVDataSourceFactory.this, parts.get(index[0]++), tabRef.get(), reflectCols);
}
};
return new DataDumpers(splitCount, dumpers);
}
Aggregations