Search in sources :

Example 1 with TiDAGRequest

use of com.pingcap.tikv.meta.TiDAGRequest in project plugins by qlangtech.

the class TiKVDataSourceDumper method startDump.

@Override
public Iterator<Map<String, Object>> startDump() {
    this.tiSession = dsFactory.getTiSession();
    // Catalog cat = this.tiSession.getCatalog();
    // TiDBInfo db = cat.getDatabase(dbName);
    // TiTableInfo tiTable = cat.getTable(db, table.getTableName());
    TiDAGRequest dagRequest = dsFactory.getTiDAGRequest(this.targetCols, tiSession, tab.tableInfo);
    Snapshot snapshot = tiSession.createSnapshot(dagRequest.getStartTs());
    // 取得的是列向量
    Iterator<TiChunk> tiChunkIterator = snapshot.tableReadChunk(dagRequest, this.partition.tasks, 1024);
    return new Iterator<Map<String, Object>>() {

        TiChunk next = null;

        int numOfRows = -1;

        int rowIndex = -1;

        TiColumnVector column = null;

        ColumnMetaData columnMetaData;

        @Override
        public boolean hasNext() {
            if (next != null) {
                if (rowIndex++ < (numOfRows - 1)) {
                    return true;
                }
                next = null;
                numOfRows = -1;
                rowIndex = -1;
            }
            boolean hasNext = tiChunkIterator.hasNext();
            if (hasNext) {
                next = tiChunkIterator.next();
                if (next == null) {
                    throw new IllegalStateException("next TiChunk can not be null");
                }
                rowIndex = 0;
                numOfRows = next.numOfRows();
            }
            return hasNext;
        }

        @Override
        public Map<String, Object> next() {
            Map<String, Object> row = new HashMap<>();
            MySQLType colType = null;
            for (int i = 0; i < targetCols.size(); i++) {
                column = next.column(i);
                if (column.isNullAt(rowIndex)) {
                    continue;
                }
                colType = column.dataType().getType();
                columnMetaData = targetCols.get(i);
                if (colType == MySQLType.TypeVarchar || colType == MySQLType.TypeString || colType == MySQLType.TypeBlob) {
                    row.put(columnMetaData.getKey(), filter(column.getUTF8String(rowIndex)));
                } else if (colType == MySQLType.TypeDate || colType == MySQLType.TypeNewDate) {
                    // FIXME 日期格式化 一个1970年的一个偏移量,按照实际情况估计要重新format一下
                    // https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-date
                    row.put(columnMetaData.getKey(), // :
                    column.getLong(rowIndex));
                } else if (colType == MySQLType.TypeTimestamp || colType == MySQLType.TypeDatetime) {
                    row.put(columnMetaData.getKey(), // :
                    column.getLong(rowIndex));
                } else {
                    row.put(columnMetaData.getKey(), column.getUTF8String(rowIndex));
                }
            }
            return row;
        }
    };
}
Also used : TiChunk(com.pingcap.tikv.columnar.TiChunk) MySQLType(com.pingcap.tikv.types.MySQLType) HashMap(java.util.HashMap) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) Snapshot(com.pingcap.tikv.Snapshot) TiColumnVector(com.pingcap.tikv.columnar.TiColumnVector) Iterator(java.util.Iterator) ColumnMetaData(com.qlangtech.tis.plugin.ds.ColumnMetaData)

Example 2 with TiDAGRequest

use of com.pingcap.tikv.meta.TiDAGRequest in project tispark by pingcap.

the class CoprocessorIterator method getTiChunkIterator.

/**
 * Build a DAGIterator from TiDAGRequest and region tasks to get rows
 *
 * <p>When we are preforming a scan request using coveringIndex, {@link
 * com.pingcap.tidb.tipb.IndexScan} should be used to read index rows. In other circumstances,
 * {@link com.pingcap.tidb.tipb.TableScan} is used to scan table rows.
 *
 * @param req TiDAGRequest built
 * @param regionTasks a list or RegionTask each contains a task on a single region
 * @param session TiSession
 * @return a DAGIterator to be processed
 */
public static CoprocessorIterator<TiChunk> getTiChunkIterator(TiDAGRequest req, List<RegionTask> regionTasks, TiSession session, int numOfRows) {
    TiDAGRequest dagRequest = req.copy();
    return new DAGIterator<TiChunk>(dagRequest.buildTableScan(), regionTasks, session, SchemaInfer.create(dagRequest), dagRequest.getPushDownType(), dagRequest.getStoreType(), dagRequest.getStartTs().getVersion()) {

        @Override
        public TiChunk next() {
            DataType[] dataTypes = this.schemaInfer.getTypes().toArray(new DataType[0]);
            // TODO tiColumnarBatch is meant to be reused in the entire data loading process.
            if (this.encodeType == EncodeType.TypeDefault) {
                Row[] rows = new Row[numOfRows];
                int count = 0;
                for (int i = 0; i < rows.length && hasNext(); i++) {
                    rows[i] = rowReader.readRow(dataTypes);
                    count += 1;
                }
                TiRowColumnVector[] columnarVectors = new TiRowColumnVector[dataTypes.length];
                for (int i = 0; i < dataTypes.length; i++) {
                    columnarVectors[i] = new TiRowColumnVector(dataTypes[i], i, rows, count);
                }
                return new TiChunk(columnarVectors);
            } else if (this.encodeType == EncodeType.TypeChunk) {
                TiColumnVector[] columnarVectors = new TiColumnVector[dataTypes.length];
                List<List<TiChunkColumnVector>> childColumnVectors = new ArrayList<>();
                for (int i = 0; i < dataTypes.length; i++) {
                    childColumnVectors.add(new ArrayList<>());
                }
                int count = 0;
                // TODO(Zhexuan Yang) we need control memory limit in case of out of memory error
                while (count < numOfRows && hasNext()) {
                    for (int i = 0; i < dataTypes.length; i++) {
                        childColumnVectors.get(i).add(dataTypes[i].decodeChunkColumn(dataInput));
                    }
                    int size = childColumnVectors.get(0).size();
                    count += childColumnVectors.get(0).get(size - 1).numOfRows();
                    // left data should be trashed.
                    dataInput = new CodecDataInput(new byte[0]);
                }
                for (int i = 0; i < dataTypes.length; i++) {
                    columnarVectors[i] = new BatchedTiChunkColumnVector(childColumnVectors.get(i), count);
                }
                return new TiChunk(columnarVectors);
            } else {
                // reading column count
                long colCount = IntegerCodec.readUVarLong(dataInput);
                long numOfRows = IntegerCodec.readUVarLong(dataInput);
                TiColumnVector[] columnVectors = new TiColumnVector[(int) colCount];
                for (int columnIdx = 0; columnIdx < colCount; columnIdx++) {
                    // reading column name
                    long length = IntegerCodec.readUVarLong(dataInput);
                    for (int i = 0; i < length; i++) {
                        dataInput.readByte();
                    }
                    // reading type name
                    length = IntegerCodec.readUVarLong(dataInput);
                    byte[] utf8Bytes = new byte[(int) length];
                    for (int i = 0; i < length; i++) {
                        utf8Bytes[i] = dataInput.readByte();
                    }
                    String typeName = new String(utf8Bytes, StandardCharsets.UTF_8);
                    CHType type = CHTypeMapping.parseType(typeName);
                    columnVectors[columnIdx] = type.decode(dataInput, (int) numOfRows);
                // TODO this is workaround to bybass nullable type
                }
                dataInput = new CodecDataInput(new byte[0]);
                return new TiChunk(columnVectors);
            }
        }
    };
}
Also used : TiChunk(com.pingcap.tikv.columnar.TiChunk) BatchedTiChunkColumnVector(com.pingcap.tikv.columnar.BatchedTiChunkColumnVector) ArrayList(java.util.ArrayList) CHType(com.pingcap.tikv.columnar.datatypes.CHType) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) TiChunkColumnVector(com.pingcap.tikv.columnar.TiChunkColumnVector) BatchedTiChunkColumnVector(com.pingcap.tikv.columnar.BatchedTiChunkColumnVector) CodecDataInput(com.pingcap.tikv.codec.CodecDataInput) DataType(com.pingcap.tikv.types.DataType) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.pingcap.tikv.row.Row) TiRowColumnVector(com.pingcap.tikv.columnar.TiRowColumnVector)

Example 3 with TiDAGRequest

use of com.pingcap.tikv.meta.TiDAGRequest in project tispark by pingcap.

the class SchemaInferTest method simpleSelectSchemaInferTest.

@Test
public void simpleSelectSchemaInferTest() {
    // select name from t1;
    TiDAGRequest tiDAGRequest = new TiDAGRequest(TiDAGRequest.PushDownType.NORMAL);
    tiDAGRequest.addRequiredColumn(name);
    tiDAGRequest.setTableInfo(table);
    tiDAGRequest.setStartTs(ts);
    List<DataType> dataTypes = SchemaInfer.create(tiDAGRequest).getTypes();
    assertEquals(1, dataTypes.size());
    assertEquals(StringType.VARCHAR.getClass(), dataTypes.get(0).getClass());
}
Also used : DataType(com.pingcap.tikv.types.DataType) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) Test(org.junit.Test)

Example 4 with TiDAGRequest

use of com.pingcap.tikv.meta.TiDAGRequest in project tispark by pingcap.

the class SchemaInferTest method makeSelectDAGReq.

private List<TiDAGRequest> makeSelectDAGReq(ByItem... byItems) {
    List<TiDAGRequest> reqs = new ArrayList<>();
    for (ByItem byItem : byItems) {
        // select sum(number) from t1 group by name;
        TiDAGRequest dagRequest = new TiDAGRequest(TiDAGRequest.PushDownType.NORMAL);
        dagRequest.setTableInfo(table);
        dagRequest.addRequiredColumn(name);
        dagRequest.addRequiredColumn(number);
        dagRequest.addAggregate(sum);
        dagRequest.getGroupByItems().add(byItem);
        dagRequest.setStartTs(ts);
        reqs.add(dagRequest);
    }
    return reqs;
}
Also used : ByItem(com.pingcap.tikv.expression.ByItem) ArrayList(java.util.ArrayList) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest)

Example 5 with TiDAGRequest

use of com.pingcap.tikv.meta.TiDAGRequest in project plugins by qlangtech.

the class TiKVDataSourceFactory method getDataDumpers.

public DataDumpers getDataDumpers(TISTable table, Optional<Long> regionId) {
    // target cols
    final List<ColumnMetaData> reflectCols = table.getReflectCols();
    if (CollectionUtils.isEmpty(reflectCols)) {
        throw new IllegalStateException("param reflectCols can not be null");
    }
    final AtomicReference<TiTableInfoWrapper> tabRef = new AtomicReference<>();
    final List<TiPartition> parts = this.openTiDB((session, c, db) -> {
        TiTableInfo tiTable = c.getTable(db, table.getTableName());
        Objects.requireNonNull(tiTable, "table:" + table.getTableName() + " can not find relevant table in TiDB");
        tabRef.set(new TiTableInfoWrapper(tiTable));
        TiDAGRequest dagRequest = getTiDAGRequest(reflectCols, session, tiTable);
        List<Long> prunedPhysicalIds = dagRequest.getPrunedPhysicalIds();
        return prunedPhysicalIds.stream().flatMap((prunedPhysicalId) -> createPartitions(prunedPhysicalId, session, dagRequest.copyReqWithPhysicalId(prunedPhysicalId), regionId).stream()).collect(Collectors.toList());
    });
    int[] index = new int[1];
    final int splitCount = parts.size();
    Objects.requireNonNull(tabRef.get(), "instacne of TiTableInfo can not be null");
    Iterator<IDataSourceDumper> dumpers = new Iterator<IDataSourceDumper>() {

        @Override
        public boolean hasNext() {
            return index[0] < splitCount;
        }

        @Override
        public IDataSourceDumper next() {
            return new TiKVDataSourceDumper(TiKVDataSourceFactory.this, parts.get(index[0]++), tabRef.get(), reflectCols);
        }
    };
    return new DataDumpers(splitCount, dumpers);
}
Also used : TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) java.util(java.util) LoggerFactory(org.slf4j.LoggerFactory) FormField(com.qlangtech.tis.plugin.annotation.FormField) AtomicReference(java.util.concurrent.atomic.AtomicReference) Context(com.alibaba.citrus.turbine.Context) TiSession(com.pingcap.tikv.TiSession) CollectionUtils(org.apache.commons.collections.CollectionUtils) Lists(com.pingcap.com.google.common.collect.Lists) TiTableInfo(com.pingcap.tikv.meta.TiTableInfo) FormFieldType(com.qlangtech.tis.plugin.annotation.FormFieldType) Validator(com.qlangtech.tis.plugin.annotation.Validator) TISExtension(com.qlangtech.tis.extension.TISExtension) Logger(org.slf4j.Logger) TiDBInfo(com.pingcap.tikv.meta.TiDBInfo) RangeSplitter(com.pingcap.tikv.util.RangeSplitter) com.qlangtech.tis.plugin.ds(com.qlangtech.tis.plugin.ds) Field(java.lang.reflect.Field) Collectors(java.util.stream.Collectors) TiConfiguration(com.pingcap.tikv.TiConfiguration) Public(com.qlangtech.tis.annotation.Public) IControlMsgHandler(com.qlangtech.tis.runtime.module.misc.IControlMsgHandler) Catalog(com.pingcap.tikv.catalog.Catalog) Maps(com.pingcap.com.google.common.collect.Maps) Types(java.sql.Types) AtomicReference(java.util.concurrent.atomic.AtomicReference) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) TiTableInfo(com.pingcap.tikv.meta.TiTableInfo)

Aggregations

TiDAGRequest (com.pingcap.tikv.meta.TiDAGRequest)11 DataType (com.pingcap.tikv.types.DataType)5 Test (org.junit.Test)4 TiTableInfo (com.pingcap.tikv.meta.TiTableInfo)3 Row (com.pingcap.tikv.row.Row)3 ArrayList (java.util.ArrayList)3 TiChunk (com.pingcap.tikv.columnar.TiChunk)2 KeyRange (org.tikv.kvproto.Coprocessor.KeyRange)2 Context (com.alibaba.citrus.turbine.Context)1 Lists (com.pingcap.com.google.common.collect.Lists)1 Maps (com.pingcap.com.google.common.collect.Maps)1 MockServerTest (com.pingcap.tikv.MockServerTest)1 Snapshot (com.pingcap.tikv.Snapshot)1 TiConfiguration (com.pingcap.tikv.TiConfiguration)1 TiSession (com.pingcap.tikv.TiSession)1 Catalog (com.pingcap.tikv.catalog.Catalog)1 CodecDataInput (com.pingcap.tikv.codec.CodecDataInput)1 CodecDataOutput (com.pingcap.tikv.codec.CodecDataOutput)1 BatchedTiChunkColumnVector (com.pingcap.tikv.columnar.BatchedTiChunkColumnVector)1 TiChunkColumnVector (com.pingcap.tikv.columnar.TiChunkColumnVector)1