Search in sources :

Example 1 with Row

use of com.pingcap.tikv.row.Row in project tispark by pingcap.

the class CoprocessorIterator method getTiChunkIterator.

/**
 * Build a DAGIterator from TiDAGRequest and region tasks to get rows
 *
 * <p>When we are preforming a scan request using coveringIndex, {@link
 * com.pingcap.tidb.tipb.IndexScan} should be used to read index rows. In other circumstances,
 * {@link com.pingcap.tidb.tipb.TableScan} is used to scan table rows.
 *
 * @param req TiDAGRequest built
 * @param regionTasks a list or RegionTask each contains a task on a single region
 * @param session TiSession
 * @return a DAGIterator to be processed
 */
public static CoprocessorIterator<TiChunk> getTiChunkIterator(TiDAGRequest req, List<RegionTask> regionTasks, TiSession session, int numOfRows) {
    TiDAGRequest dagRequest = req.copy();
    return new DAGIterator<TiChunk>(dagRequest.buildTableScan(), regionTasks, session, SchemaInfer.create(dagRequest), dagRequest.getPushDownType(), dagRequest.getStoreType(), dagRequest.getStartTs().getVersion()) {

        @Override
        public TiChunk next() {
            DataType[] dataTypes = this.schemaInfer.getTypes().toArray(new DataType[0]);
            // TODO tiColumnarBatch is meant to be reused in the entire data loading process.
            if (this.encodeType == EncodeType.TypeDefault) {
                Row[] rows = new Row[numOfRows];
                int count = 0;
                for (int i = 0; i < rows.length && hasNext(); i++) {
                    rows[i] = rowReader.readRow(dataTypes);
                    count += 1;
                }
                TiRowColumnVector[] columnarVectors = new TiRowColumnVector[dataTypes.length];
                for (int i = 0; i < dataTypes.length; i++) {
                    columnarVectors[i] = new TiRowColumnVector(dataTypes[i], i, rows, count);
                }
                return new TiChunk(columnarVectors);
            } else if (this.encodeType == EncodeType.TypeChunk) {
                TiColumnVector[] columnarVectors = new TiColumnVector[dataTypes.length];
                List<List<TiChunkColumnVector>> childColumnVectors = new ArrayList<>();
                for (int i = 0; i < dataTypes.length; i++) {
                    childColumnVectors.add(new ArrayList<>());
                }
                int count = 0;
                // TODO(Zhexuan Yang) we need control memory limit in case of out of memory error
                while (count < numOfRows && hasNext()) {
                    for (int i = 0; i < dataTypes.length; i++) {
                        childColumnVectors.get(i).add(dataTypes[i].decodeChunkColumn(dataInput));
                    }
                    int size = childColumnVectors.get(0).size();
                    count += childColumnVectors.get(0).get(size - 1).numOfRows();
                    // left data should be trashed.
                    dataInput = new CodecDataInput(new byte[0]);
                }
                for (int i = 0; i < dataTypes.length; i++) {
                    columnarVectors[i] = new BatchedTiChunkColumnVector(childColumnVectors.get(i), count);
                }
                return new TiChunk(columnarVectors);
            } else {
                // reading column count
                long colCount = IntegerCodec.readUVarLong(dataInput);
                long numOfRows = IntegerCodec.readUVarLong(dataInput);
                TiColumnVector[] columnVectors = new TiColumnVector[(int) colCount];
                for (int columnIdx = 0; columnIdx < colCount; columnIdx++) {
                    // reading column name
                    long length = IntegerCodec.readUVarLong(dataInput);
                    for (int i = 0; i < length; i++) {
                        dataInput.readByte();
                    }
                    // reading type name
                    length = IntegerCodec.readUVarLong(dataInput);
                    byte[] utf8Bytes = new byte[(int) length];
                    for (int i = 0; i < length; i++) {
                        utf8Bytes[i] = dataInput.readByte();
                    }
                    String typeName = new String(utf8Bytes, StandardCharsets.UTF_8);
                    CHType type = CHTypeMapping.parseType(typeName);
                    columnVectors[columnIdx] = type.decode(dataInput, (int) numOfRows);
                // TODO this is workaround to bybass nullable type
                }
                dataInput = new CodecDataInput(new byte[0]);
                return new TiChunk(columnVectors);
            }
        }
    };
}
Also used : TiChunk(com.pingcap.tikv.columnar.TiChunk) BatchedTiChunkColumnVector(com.pingcap.tikv.columnar.BatchedTiChunkColumnVector) ArrayList(java.util.ArrayList) CHType(com.pingcap.tikv.columnar.datatypes.CHType) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) TiChunkColumnVector(com.pingcap.tikv.columnar.TiChunkColumnVector) BatchedTiChunkColumnVector(com.pingcap.tikv.columnar.BatchedTiChunkColumnVector) CodecDataInput(com.pingcap.tikv.codec.CodecDataInput) DataType(com.pingcap.tikv.types.DataType) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.pingcap.tikv.row.Row) TiRowColumnVector(com.pingcap.tikv.columnar.TiRowColumnVector)

Example 2 with Row

use of com.pingcap.tikv.row.Row in project tispark by pingcap.

the class ChunkIteratorTest method chunkTest.

@Test
public void chunkTest() {
    ChunkIterator<ByteString> chunkIterator = ChunkIterator.getRawBytesChunkIterator(chunks);
    DataType bytes = StringType.VARCHAR;
    DataType ints = IntegerType.INT;
    Row row = ObjectRowImpl.create(6);
    CodecDataInput cdi = new CodecDataInput(chunkIterator.next());
    setValueToRow(cdi, ints, 0, row);
    setValueToRow(cdi, bytes, 1, row);
    cdi = new CodecDataInput(chunkIterator.next());
    setValueToRow(cdi, ints, 2, row);
    setValueToRow(cdi, bytes, 3, row);
    cdi = new CodecDataInput(chunkIterator.next());
    setValueToRow(cdi, ints, 4, row);
    setValueToRow(cdi, bytes, 5, row);
    assertEquals(row.getLong(0), 1);
    assertEquals(row.getString(1), "a");
    assertEquals(row.getLong(2), 2);
    assertEquals(row.getString(3), "b");
    assertEquals(row.getLong(4), 3);
    assertEquals(row.getString(5), "c");
}
Also used : ByteString(com.google.protobuf.ByteString) CodecDataInput(com.pingcap.tikv.codec.CodecDataInput) DataType(com.pingcap.tikv.types.DataType) Row(com.pingcap.tikv.row.Row) Test(org.junit.Test)

Example 3 with Row

use of com.pingcap.tikv.row.Row in project tispark by pingcap.

the class CoprocessorIterator method getHandleIterator.

/**
 * Build a DAGIterator from TiDAGRequest and region tasks to get handles
 *
 * <p>When we use getHandleIterator, we must be preforming a IndexScan.
 *
 * @param req TiDAGRequest built
 * @param regionTasks a list or RegionTask each contains a task on a single region
 * @param session TiSession
 * @return a DAGIterator to be processed
 */
public static CoprocessorIterator<Handle> getHandleIterator(TiDAGRequest req, List<RegionTask> regionTasks, TiSession session) {
    TiDAGRequest dagRequest = req.copy();
    // set encode type to TypeDefault because currently, only
    // CoprocessorIterator<TiChunk> support TypeChunk and TypeCHBlock encode type
    dagRequest.setEncodeType(EncodeType.TypeDefault);
    return new DAGIterator<Handle>(dagRequest.buildIndexScan(), regionTasks, session, SchemaInfer.create(dagRequest, true), dagRequest.getPushDownType(), dagRequest.getStoreType(), dagRequest.getStartTs().getVersion()) {

        @Override
        public Handle next() {
            Row row = rowReader.readRow(handleTypes);
            Object[] data = new Object[handleTypes.length];
            for (int i = 0; i < handleTypes.length; i++) {
                data[i] = row.get(i, handleTypes[i]);
            }
            if (handleTypes.length == 1 && handleTypes[0] == IntegerType.BIGINT) {
                return new IntHandle((long) data[0]);
            } else {
                return CommonHandle.newCommonHandle(handleTypes, data);
            }
        }
    };
}
Also used : IntHandle(com.pingcap.tikv.key.IntHandle) Row(com.pingcap.tikv.row.Row) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest)

Example 4 with Row

use of com.pingcap.tikv.row.Row in project tispark by pingcap.

the class DAGIteratorTest method staleEpochTest.

@Test
public void staleEpochTest() {
    Metapb.Store store = Metapb.Store.newBuilder().setAddress(LOCAL_ADDR + ":" + port).setId(1).setState(Metapb.StoreState.Up).setVersion(Version.RESOLVE_LOCK_V4).build();
    TiTableInfo table = createTable();
    TiDAGRequest req = new TiDAGRequest(PushDownType.NORMAL);
    req.setTableInfo(table);
    req.addRequiredColumn(ColumnRef.create("c1", IntegerType.INT));
    req.addRequiredColumn(ColumnRef.create("c2", StringType.VARCHAR));
    req.setStartTs(new TiTimestamp(0, 1));
    List<KeyRange> keyRanges = ImmutableList.of(createByteStringRange(ByteString.copyFromUtf8("key1"), ByteString.copyFromUtf8("key4")));
    pdServer.addGetRegionResp(GrpcUtils.makeGetRegionResponse(pdServer.getClusterId(), region.getMeta()));
    pdServer.addGetStoreResp(GrpcUtils.makeGetStoreResponse(pdServer.getClusterId(), store));
    server.putError("key1", KVMockServer.STALE_EPOCH);
    CodecDataOutput cdo = new CodecDataOutput();
    IntegerCodec.writeLongFully(cdo, 666, false);
    BytesCodec.writeBytesFully(cdo, "value1".getBytes());
    server.put("key1", cdo.toByteString());
    List<RegionTask> tasks = ImmutableList.of(RegionTask.newInstance(region, store, keyRanges));
    CoprocessorIterator<Row> iter = CoprocessorIterator.getRowIterator(req, tasks, session);
    if (!iter.hasNext()) {
        assertEquals("iterator has next should be true", true, false);
    } else {
        Row r = iter.next();
        SchemaInfer infer = SchemaInfer.create(req);
        assertEquals(r.get(0, infer.getType(0)), 666L);
        assertEquals(r.get(1, infer.getType(1)), "value1");
    }
}
Also used : TiTimestamp(com.pingcap.tikv.meta.TiTimestamp) KeyRange(org.tikv.kvproto.Coprocessor.KeyRange) TiDAGRequest(com.pingcap.tikv.meta.TiDAGRequest) RegionTask(com.pingcap.tikv.util.RangeSplitter.RegionTask) TiTableInfo(com.pingcap.tikv.meta.TiTableInfo) CodecDataOutput(com.pingcap.tikv.codec.CodecDataOutput) Row(com.pingcap.tikv.row.Row) SchemaInfer(com.pingcap.tikv.operation.SchemaInfer) Metapb(org.tikv.kvproto.Metapb) MockServerTest(com.pingcap.tikv.MockServerTest) Test(org.junit.Test)

Example 5 with Row

use of com.pingcap.tikv.row.Row in project tispark by pingcap.

the class TableCodecV1Test method testRowCodec.

@Test
public void testRowCodec() {
    // multiple test was added since encodeRow refuse its cdo
    for (int i = 0; i < 4; i++) {
        byte[] bytes = TableCodecV1.encodeRow(tblInfo.getColumns(), values, tblInfo.isPkHandle());
        // testing the correctness via decodeRow
        Row row = TableCodecV1.decodeRow(bytes, new IntHandle(1L), tblInfo);
        for (int j = 0; j < tblInfo.getColumns().size(); j++) {
            assertEquals(row.get(j, null), values[j]);
        }
    }
}
Also used : IntHandle(com.pingcap.tikv.key.IntHandle) Row(com.pingcap.tikv.row.Row) Test(org.junit.Test)

Aggregations

Row (com.pingcap.tikv.row.Row)5 TiDAGRequest (com.pingcap.tikv.meta.TiDAGRequest)3 Test (org.junit.Test)3 CodecDataInput (com.pingcap.tikv.codec.CodecDataInput)2 IntHandle (com.pingcap.tikv.key.IntHandle)2 DataType (com.pingcap.tikv.types.DataType)2 ByteString (com.google.protobuf.ByteString)1 MockServerTest (com.pingcap.tikv.MockServerTest)1 CodecDataOutput (com.pingcap.tikv.codec.CodecDataOutput)1 BatchedTiChunkColumnVector (com.pingcap.tikv.columnar.BatchedTiChunkColumnVector)1 TiChunk (com.pingcap.tikv.columnar.TiChunk)1 TiChunkColumnVector (com.pingcap.tikv.columnar.TiChunkColumnVector)1 TiRowColumnVector (com.pingcap.tikv.columnar.TiRowColumnVector)1 CHType (com.pingcap.tikv.columnar.datatypes.CHType)1 TiTableInfo (com.pingcap.tikv.meta.TiTableInfo)1 TiTimestamp (com.pingcap.tikv.meta.TiTimestamp)1 SchemaInfer (com.pingcap.tikv.operation.SchemaInfer)1 RegionTask (com.pingcap.tikv.util.RangeSplitter.RegionTask)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1