Search in sources :

Example 1 with TiChunk

use of com.pingcap.tikv.columnar.TiChunk in project plugins by qlangtech.

the class TiKVDataSourceDumper method startDump.

@Override
public Iterator<Map<String, Object>> startDump() {
    this.tiSession = dsFactory.getTiSession();
    // Catalog cat = this.tiSession.getCatalog();
    // TiDBInfo db = cat.getDatabase(dbName);
    // TiTableInfo tiTable = cat.getTable(db, table.getTableName());
    TiDAGRequest dagRequest = dsFactory.getTiDAGRequest(this.targetCols, tiSession, tab.tableInfo);
    Snapshot snapshot = tiSession.createSnapshot(dagRequest.getStartTs());
    // 取得的是列向量
    Iterator<TiChunk> tiChunkIterator = snapshot.tableReadChunk(dagRequest, this.partition.tasks, 1024);
    return new Iterator<Map<String, Object>>() {

        TiChunk next = null;

        int numOfRows = -1;

        int rowIndex = -1;

        TiColumnVector column = null;

        ColumnMetaData columnMetaData;

        @Override
        public boolean hasNext() {
            if (next != null) {
                if (rowIndex++ < (numOfRows - 1)) {
                    return true;
                }
                next = null;
                numOfRows = -1;
                rowIndex = -1;
            }
            boolean hasNext = tiChunkIterator.hasNext();
            if (hasNext) {
                next = tiChunkIterator.next();
                if (next == null) {
                    throw new IllegalStateException("next TiChunk can not be null");
                }
                rowIndex = 0;
                numOfRows = next.numOfRows();
            }
            return hasNext;
        }

        @Override
        public Map<String, Object> next() {
            Map<String, Object> row = new HashMap<>();
            MySQLType colType = null;
            for (int i = 0; i < targetCols.size(); i++) {
                column = next.column(i);
                if (column.isNullAt(rowIndex)) {
                    continue;
                }
                colType = column.dataType().getType();
                columnMetaData = targetCols.get(i);
                if (colType == MySQLType.TypeVarchar || colType == MySQLType.TypeString || colType == MySQLType.TypeBlob) {
                    row.put(columnMetaData.getKey(), filter(column.getUTF8String(rowIndex)));
                } else if (colType == MySQLType.TypeDate || colType == MySQLType.TypeNewDate) {
                    // FIXME 日期格式化 一个1970年的一个偏移量,按照实际情况估计要重新format一下
                    // https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-date
                    row.put(columnMetaData.getKey(), // :
                    column.getLong(rowIndex));
                } else if (colType == MySQLType.TypeTimestamp || colType == MySQLType.TypeDatetime) {
                    row.put(columnMetaData.getKey(), // :
                    column.getLong(rowIndex));
                } else {
                    row.put(columnMetaData.getKey(), column.getUTF8String(rowIndex));
                }
            }
            return row;
        }
    };
}
Also used : TiChunk(com.pingcap.tikv.columnar.TiChunk) MySQLType(com.pingcap.tikv.types.MySQLType) HashMap(java.util.HashMap) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) Snapshot(com.pingcap.tikv.Snapshot) TiColumnVector(com.pingcap.tikv.columnar.TiColumnVector) Iterator(java.util.Iterator) ColumnMetaData(com.qlangtech.tis.plugin.ds.ColumnMetaData)

Example 2 with TiChunk

use of com.pingcap.tikv.columnar.TiChunk in project tispark by pingcap.

the class CoprocessorIterator method getTiChunkIterator.

/**
 * Build a DAGIterator from TiDAGRequest and region tasks to get rows
 *
 * <p>When we are preforming a scan request using coveringIndex, {@link
 * com.pingcap.tidb.tipb.IndexScan} should be used to read index rows. In other circumstances,
 * {@link com.pingcap.tidb.tipb.TableScan} is used to scan table rows.
 *
 * @param req TiDAGRequest built
 * @param regionTasks a list or RegionTask each contains a task on a single region
 * @param session TiSession
 * @return a DAGIterator to be processed
 */
public static CoprocessorIterator<TiChunk> getTiChunkIterator(TiDAGRequest req, List<RegionTask> regionTasks, TiSession session, int numOfRows) {
    TiDAGRequest dagRequest = req.copy();
    return new DAGIterator<TiChunk>(dagRequest.buildTableScan(), regionTasks, session, SchemaInfer.create(dagRequest), dagRequest.getPushDownType(), dagRequest.getStoreType(), dagRequest.getStartTs().getVersion()) {

        @Override
        public TiChunk next() {
            DataType[] dataTypes = this.schemaInfer.getTypes().toArray(new DataType[0]);
            // TODO tiColumnarBatch is meant to be reused in the entire data loading process.
            if (this.encodeType == EncodeType.TypeDefault) {
                Row[] rows = new Row[numOfRows];
                int count = 0;
                for (int i = 0; i < rows.length && hasNext(); i++) {
                    rows[i] = rowReader.readRow(dataTypes);
                    count += 1;
                }
                TiRowColumnVector[] columnarVectors = new TiRowColumnVector[dataTypes.length];
                for (int i = 0; i < dataTypes.length; i++) {
                    columnarVectors[i] = new TiRowColumnVector(dataTypes[i], i, rows, count);
                }
                return new TiChunk(columnarVectors);
            } else if (this.encodeType == EncodeType.TypeChunk) {
                TiColumnVector[] columnarVectors = new TiColumnVector[dataTypes.length];
                List<List<TiChunkColumnVector>> childColumnVectors = new ArrayList<>();
                for (int i = 0; i < dataTypes.length; i++) {
                    childColumnVectors.add(new ArrayList<>());
                }
                int count = 0;
                // TODO(Zhexuan Yang) we need control memory limit in case of out of memory error
                while (count < numOfRows && hasNext()) {
                    for (int i = 0; i < dataTypes.length; i++) {
                        childColumnVectors.get(i).add(dataTypes[i].decodeChunkColumn(dataInput));
                    }
                    int size = childColumnVectors.get(0).size();
                    count += childColumnVectors.get(0).get(size - 1).numOfRows();
                    // left data should be trashed.
                    dataInput = new CodecDataInput(new byte[0]);
                }
                for (int i = 0; i < dataTypes.length; i++) {
                    columnarVectors[i] = new BatchedTiChunkColumnVector(childColumnVectors.get(i), count);
                }
                return new TiChunk(columnarVectors);
            } else {
                // reading column count
                long colCount = IntegerCodec.readUVarLong(dataInput);
                long numOfRows = IntegerCodec.readUVarLong(dataInput);
                TiColumnVector[] columnVectors = new TiColumnVector[(int) colCount];
                for (int columnIdx = 0; columnIdx < colCount; columnIdx++) {
                    // reading column name
                    long length = IntegerCodec.readUVarLong(dataInput);
                    for (int i = 0; i < length; i++) {
                        dataInput.readByte();
                    }
                    // reading type name
                    length = IntegerCodec.readUVarLong(dataInput);
                    byte[] utf8Bytes = new byte[(int) length];
                    for (int i = 0; i < length; i++) {
                        utf8Bytes[i] = dataInput.readByte();
                    }
                    String typeName = new String(utf8Bytes, StandardCharsets.UTF_8);
                    CHType type = CHTypeMapping.parseType(typeName);
                    columnVectors[columnIdx] = type.decode(dataInput, (int) numOfRows);
                // TODO this is workaround to bybass nullable type
                }
                dataInput = new CodecDataInput(new byte[0]);
                return new TiChunk(columnVectors);
            }
        }
    };
}
Also used : TiChunk(com.pingcap.tikv.columnar.TiChunk) BatchedTiChunkColumnVector(com.pingcap.tikv.columnar.BatchedTiChunkColumnVector) ArrayList(java.util.ArrayList) CHType(com.pingcap.tikv.columnar.datatypes.CHType) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) TiChunkColumnVector(com.pingcap.tikv.columnar.TiChunkColumnVector) BatchedTiChunkColumnVector(com.pingcap.tikv.columnar.BatchedTiChunkColumnVector) CodecDataInput(com.pingcap.tikv.codec.CodecDataInput) DataType(com.pingcap.tikv.types.DataType) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.pingcap.tikv.row.Row) TiRowColumnVector(com.pingcap.tikv.columnar.TiRowColumnVector)

Aggregations

TiChunk (com.pingcap.tikv.columnar.TiChunk)2 TiDAGRequest (com.pingcap.tikv.meta.TiDAGRequest)2 Snapshot (com.pingcap.tikv.Snapshot)1 CodecDataInput (com.pingcap.tikv.codec.CodecDataInput)1 BatchedTiChunkColumnVector (com.pingcap.tikv.columnar.BatchedTiChunkColumnVector)1 TiChunkColumnVector (com.pingcap.tikv.columnar.TiChunkColumnVector)1 TiColumnVector (com.pingcap.tikv.columnar.TiColumnVector)1 TiRowColumnVector (com.pingcap.tikv.columnar.TiRowColumnVector)1 CHType (com.pingcap.tikv.columnar.datatypes.CHType)1 Row (com.pingcap.tikv.row.Row)1 DataType (com.pingcap.tikv.types.DataType)1 MySQLType (com.pingcap.tikv.types.MySQLType)1 ColumnMetaData (com.qlangtech.tis.plugin.ds.ColumnMetaData)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 List (java.util.List)1