Search in sources :

Example 1 with CHType

use of com.pingcap.tikv.columnar.datatypes.CHType in project tispark by pingcap.

the class CoprocessorIterator method getTiChunkIterator.

/**
 * Build a DAGIterator from TiDAGRequest and region tasks to get rows
 *
 * <p>When we are preforming a scan request using coveringIndex, {@link
 * com.pingcap.tidb.tipb.IndexScan} should be used to read index rows. In other circumstances,
 * {@link com.pingcap.tidb.tipb.TableScan} is used to scan table rows.
 *
 * @param req TiDAGRequest built
 * @param regionTasks a list or RegionTask each contains a task on a single region
 * @param session TiSession
 * @return a DAGIterator to be processed
 */
public static CoprocessorIterator<TiChunk> getTiChunkIterator(TiDAGRequest req, List<RegionTask> regionTasks, TiSession session, int numOfRows) {
    TiDAGRequest dagRequest = req.copy();
    return new DAGIterator<TiChunk>(dagRequest.buildTableScan(), regionTasks, session, SchemaInfer.create(dagRequest), dagRequest.getPushDownType(), dagRequest.getStoreType(), dagRequest.getStartTs().getVersion()) {

        @Override
        public TiChunk next() {
            DataType[] dataTypes = this.schemaInfer.getTypes().toArray(new DataType[0]);
            // TODO tiColumnarBatch is meant to be reused in the entire data loading process.
            if (this.encodeType == EncodeType.TypeDefault) {
                Row[] rows = new Row[numOfRows];
                int count = 0;
                for (int i = 0; i < rows.length && hasNext(); i++) {
                    rows[i] = rowReader.readRow(dataTypes);
                    count += 1;
                }
                TiRowColumnVector[] columnarVectors = new TiRowColumnVector[dataTypes.length];
                for (int i = 0; i < dataTypes.length; i++) {
                    columnarVectors[i] = new TiRowColumnVector(dataTypes[i], i, rows, count);
                }
                return new TiChunk(columnarVectors);
            } else if (this.encodeType == EncodeType.TypeChunk) {
                TiColumnVector[] columnarVectors = new TiColumnVector[dataTypes.length];
                List<List<TiChunkColumnVector>> childColumnVectors = new ArrayList<>();
                for (int i = 0; i < dataTypes.length; i++) {
                    childColumnVectors.add(new ArrayList<>());
                }
                int count = 0;
                // TODO(Zhexuan Yang) we need control memory limit in case of out of memory error
                while (count < numOfRows && hasNext()) {
                    for (int i = 0; i < dataTypes.length; i++) {
                        childColumnVectors.get(i).add(dataTypes[i].decodeChunkColumn(dataInput));
                    }
                    int size = childColumnVectors.get(0).size();
                    count += childColumnVectors.get(0).get(size - 1).numOfRows();
                    // left data should be trashed.
                    dataInput = new CodecDataInput(new byte[0]);
                }
                for (int i = 0; i < dataTypes.length; i++) {
                    columnarVectors[i] = new BatchedTiChunkColumnVector(childColumnVectors.get(i), count);
                }
                return new TiChunk(columnarVectors);
            } else {
                // reading column count
                long colCount = IntegerCodec.readUVarLong(dataInput);
                long numOfRows = IntegerCodec.readUVarLong(dataInput);
                TiColumnVector[] columnVectors = new TiColumnVector[(int) colCount];
                for (int columnIdx = 0; columnIdx < colCount; columnIdx++) {
                    // reading column name
                    long length = IntegerCodec.readUVarLong(dataInput);
                    for (int i = 0; i < length; i++) {
                        dataInput.readByte();
                    }
                    // reading type name
                    length = IntegerCodec.readUVarLong(dataInput);
                    byte[] utf8Bytes = new byte[(int) length];
                    for (int i = 0; i < length; i++) {
                        utf8Bytes[i] = dataInput.readByte();
                    }
                    String typeName = new String(utf8Bytes, StandardCharsets.UTF_8);
                    CHType type = CHTypeMapping.parseType(typeName);
                    columnVectors[columnIdx] = type.decode(dataInput, (int) numOfRows);
                // TODO this is workaround to bybass nullable type
                }
                dataInput = new CodecDataInput(new byte[0]);
                return new TiChunk(columnVectors);
            }
        }
    };
}
Also used : TiChunk(com.pingcap.tikv.columnar.TiChunk) BatchedTiChunkColumnVector(com.pingcap.tikv.columnar.BatchedTiChunkColumnVector) ArrayList(java.util.ArrayList) CHType(com.pingcap.tikv.columnar.datatypes.CHType) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) TiChunkColumnVector(com.pingcap.tikv.columnar.TiChunkColumnVector) BatchedTiChunkColumnVector(com.pingcap.tikv.columnar.BatchedTiChunkColumnVector) CodecDataInput(com.pingcap.tikv.codec.CodecDataInput) DataType(com.pingcap.tikv.types.DataType) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.pingcap.tikv.row.Row) TiRowColumnVector(com.pingcap.tikv.columnar.TiRowColumnVector)

Example 2 with CHType

use of com.pingcap.tikv.columnar.datatypes.CHType in project tispark by pingcap.

the class CHTypeMapping method parseType.

public static CHType parseType(String typeName) {
    if (typeName == null || typeName.isEmpty()) {
        throw new UnsupportedOperationException("Empty CH type!");
    }
    typeName = typeName.trim();
    switch(typeName) {
        case "UInt8":
            return new CHTypeUInt8();
        case "UInt16":
            return new CHTypeUInt16();
        case "UInt32":
            return new CHTypeUInt32();
        case "UInt64":
            return new CHTypeUInt64();
        case "Int8":
            return new CHTypeInt8();
        case "Int16":
            return new CHTypeInt16();
        case "Int32":
            return new CHTypeInt32();
        case "Int64":
            return new CHTypeInt64();
        case "Float32":
            return new CHTypeFloat32();
        case "Float64":
            return new CHTypeFloat64();
        case "Date":
            return new CHTypeDate();
        case "DateTime":
            return new CHTypeDateTime();
        case "MyDateTime":
            return new CHTypeMyDateTime();
        case "MyDate":
            return new CHTypeMyDate();
        case "String":
            return new CHTypeString();
    }
    if (typeName.startsWith("FixedString")) {
        String remain = StringUtils.removeStart(typeName, "FixedString");
        remain = StringUtils.removeEnd(StringUtils.removeStart(remain, "("), ")");
        try {
            int length = Integer.parseInt(remain);
            return new CHTypeFixedString(length);
        } catch (NumberFormatException e) {
            throw new UnsupportedOperationException("Illegal CH type: " + typeName);
        }
    }
    if (typeName.startsWith("MyDateTime")) {
        return new CHTypeMyDateTime();
    }
    if (typeName.startsWith("Decimal")) {
        String remain = StringUtils.removeStart(typeName, "Decimal");
        remain = StringUtils.removeEnd(StringUtils.removeStart(remain, "("), ")");
        try {
            String[] args = remain.split(",");
            int precision = Integer.parseInt(args[0]);
            int scale = Integer.parseInt(args[1]);
            return new CHTypeDecimal(precision, scale);
        } catch (Exception e) {
            throw new UnsupportedOperationException("Illegal CH type: " + typeName);
        }
    }
    if (typeName.startsWith("Nullable")) {
        String remain = StringUtils.removeStart(typeName, "Nullable");
        remain = StringUtils.removeEnd(StringUtils.removeStart(remain, "("), ")");
        CHType type = parseType(remain);
        type.setNullable(true);
        return type;
    }
    throw new UnsupportedOperationException("Unsupported CH type: " + typeName);
}
Also used : CHTypeUInt16(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeUInt16) CHTypeInt16(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeInt16) CHTypeDateTime(com.pingcap.tikv.columnar.datatypes.CHTypeDateTime) CHTypeString(com.pingcap.tikv.columnar.datatypes.CHTypeString) CHTypeUInt32(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeUInt32) CHTypeUInt64(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeUInt64) CHTypeFloat64(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeFloat64) CHTypeMyDateTime(com.pingcap.tikv.columnar.datatypes.CHTypeMyDateTime) CHTypeString(com.pingcap.tikv.columnar.datatypes.CHTypeString) CHTypeFixedString(com.pingcap.tikv.columnar.datatypes.CHTypeFixedString) CHType(com.pingcap.tikv.columnar.datatypes.CHType) CHTypeFloat32(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeFloat32) CHTypeMyDate(com.pingcap.tikv.columnar.datatypes.CHTypeMyDate) CHTypeFixedString(com.pingcap.tikv.columnar.datatypes.CHTypeFixedString) CHTypeInt8(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeInt8) CHTypeUInt8(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeUInt8) CHTypeDate(com.pingcap.tikv.columnar.datatypes.CHTypeDate) CHTypeDecimal(com.pingcap.tikv.columnar.datatypes.CHTypeDecimal) CHTypeInt32(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeInt32) CHTypeInt64(com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeInt64)

Aggregations

CHType (com.pingcap.tikv.columnar.datatypes.CHType)2 CodecDataInput (com.pingcap.tikv.codec.CodecDataInput)1 BatchedTiChunkColumnVector (com.pingcap.tikv.columnar.BatchedTiChunkColumnVector)1 TiChunk (com.pingcap.tikv.columnar.TiChunk)1 TiChunkColumnVector (com.pingcap.tikv.columnar.TiChunkColumnVector)1 TiRowColumnVector (com.pingcap.tikv.columnar.TiRowColumnVector)1 CHTypeDate (com.pingcap.tikv.columnar.datatypes.CHTypeDate)1 CHTypeDateTime (com.pingcap.tikv.columnar.datatypes.CHTypeDateTime)1 CHTypeDecimal (com.pingcap.tikv.columnar.datatypes.CHTypeDecimal)1 CHTypeFixedString (com.pingcap.tikv.columnar.datatypes.CHTypeFixedString)1 CHTypeMyDate (com.pingcap.tikv.columnar.datatypes.CHTypeMyDate)1 CHTypeMyDateTime (com.pingcap.tikv.columnar.datatypes.CHTypeMyDateTime)1 CHTypeFloat32 (com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeFloat32)1 CHTypeFloat64 (com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeFloat64)1 CHTypeInt16 (com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeInt16)1 CHTypeInt32 (com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeInt32)1 CHTypeInt64 (com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeInt64)1 CHTypeInt8 (com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeInt8)1 CHTypeUInt16 (com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeUInt16)1 CHTypeUInt32 (com.pingcap.tikv.columnar.datatypes.CHTypeNumber.CHTypeUInt32)1