Search in sources :

Example 46 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class RowDataKinesisDeserializationSchema method deserialize.

@Override
public RowData deserialize(byte[] recordValue, String partitionKey, String seqNum, long approxArrivalTimestamp, String stream, String shardId) throws IOException {
    RowData physicalRow = physicalDeserializer.deserialize(recordValue);
    GenericRowData metadataRow = new GenericRowData(requestedMetadataFields.size());
    for (int i = 0; i < metadataRow.getArity(); i++) {
        Metadata metadataField = requestedMetadataFields.get(i);
        if (metadataField == Metadata.Timestamp) {
            metadataRow.setField(i, TimestampData.fromEpochMillis(approxArrivalTimestamp));
        } else if (metadataField == Metadata.SequenceNumber) {
            metadataRow.setField(i, StringData.fromString(seqNum));
        } else if (metadataField == Metadata.ShardId) {
            metadataRow.setField(i, StringData.fromString(shardId));
        } else {
            String msg = String.format("Unsupported metadata key %s", metadataField);
            // should never happen
            throw new RuntimeException(msg);
        }
    }
    return new JoinedRowData(physicalRow.getRowKind(), physicalRow, metadataRow);
}
Also used : RowData(org.apache.flink.table.data.RowData) GenericRowData(org.apache.flink.table.data.GenericRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) GenericRowData(org.apache.flink.table.data.GenericRowData)

Example 47 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class ParquetColumnarRowInputFormatTest method testReadingSplit.

private int testReadingSplit(List<Integer> expected, Path path, long splitStart, long splitLength, long seekToRow) throws IOException {
    LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0) };
    ParquetColumnarRowInputFormat format = new ParquetColumnarRowInputFormat(new Configuration(), RowType.of(fieldTypes, new String[] { "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14" }), null, 500, false, true);
    // validate java serialization
    try {
        InstantiationUtil.clone(format);
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
    FileStatus fileStatus = path.getFileSystem().getFileStatus(path);
    BulkFormat.Reader<RowData> reader = format.restoreReader(EMPTY_CONF, new FileSourceSplit("id", path, splitStart, splitLength, fileStatus.getModificationTime(), fileStatus.getLen(), new String[0], new CheckpointedPosition(CheckpointedPosition.NO_OFFSET, seekToRow)));
    AtomicInteger cnt = new AtomicInteger(0);
    final AtomicReference<RowData> previousRow = new AtomicReference<>();
    forEachRemaining(reader, row -> {
        if (previousRow.get() == null) {
            previousRow.set(row);
        } else {
            // ParquetColumnarRowInputFormat should only have one row instance.
            assertSame(previousRow.get(), row);
        }
        Integer v = expected.get(cnt.get());
        if (v == null) {
            assertTrue(row.isNullAt(0));
            assertTrue(row.isNullAt(1));
            assertTrue(row.isNullAt(2));
            assertTrue(row.isNullAt(3));
            assertTrue(row.isNullAt(4));
            assertTrue(row.isNullAt(5));
            assertTrue(row.isNullAt(6));
            assertTrue(row.isNullAt(7));
            assertTrue(row.isNullAt(8));
            assertTrue(row.isNullAt(9));
            assertTrue(row.isNullAt(10));
            assertTrue(row.isNullAt(11));
            assertTrue(row.isNullAt(12));
            assertTrue(row.isNullAt(13));
            assertTrue(row.isNullAt(14));
        } else {
            assertEquals("" + v, row.getString(0).toString());
            assertEquals(v % 2 == 0, row.getBoolean(1));
            assertEquals(v.byteValue(), row.getByte(2));
            assertEquals(v.shortValue(), row.getShort(3));
            assertEquals(v.intValue(), row.getInt(4));
            assertEquals(v.longValue(), row.getLong(5));
            assertEquals(v.floatValue(), row.getFloat(6), 0);
            assertEquals(v.doubleValue(), row.getDouble(7), 0);
            assertEquals(toDateTime(v), row.getTimestamp(8, 9).toLocalDateTime());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(9, 5, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(10, 15, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(11, 20, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(12, 5, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(13, 15, 0).toBigDecimal());
            assertEquals(BigDecimal.valueOf(v), row.getDecimal(14, 20, 0).toBigDecimal());
        }
        cnt.incrementAndGet();
    });
    return cnt.get();
}
Also used : FileStatus(org.apache.flink.core.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) LogicalType(org.apache.flink.table.types.logical.LogicalType) BigIntType(org.apache.flink.table.types.logical.BigIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) FloatType(org.apache.flink.table.types.logical.FloatType) RowData(org.apache.flink.table.data.RowData) CheckpointedPosition(org.apache.flink.connector.file.src.util.CheckpointedPosition) TimestampType(org.apache.flink.table.types.logical.TimestampType) VarCharType(org.apache.flink.table.types.logical.VarCharType) BooleanType(org.apache.flink.table.types.logical.BooleanType) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DoubleType(org.apache.flink.table.types.logical.DoubleType) DecimalType(org.apache.flink.table.types.logical.DecimalType) BulkFormat(org.apache.flink.connector.file.src.reader.BulkFormat)

Example 48 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class OrcColumnarRowSplitReaderTest method testReadFileWithSelectFields.

@Test
public void testReadFileWithSelectFields() throws IOException {
    FileInputSplit[] splits = createSplits(testFileFlat, 4);
    long cnt = 0;
    long totalF0 = 0;
    Map<String, Object> partSpec = new HashMap<>();
    partSpec.put("f1", 1);
    partSpec.put("f3", 3L);
    partSpec.put("f5", "f5");
    partSpec.put("f8", BigDecimal.valueOf(5.333));
    partSpec.put("f13", "f13");
    // read all splits
    for (FileInputSplit split : splits) {
        try (OrcColumnarRowSplitReader reader = createReader(new int[] { 8, 1, 3, 0, 5, 2 }, new DataType[] { /* 0 */
        DataTypes.INT(), // part-1
        DataTypes.INT(), /* 2 */
        DataTypes.STRING(), // part-2
        DataTypes.BIGINT(), /* 4 */
        DataTypes.STRING(), // part-3
        DataTypes.STRING(), /* 6 */
        DataTypes.STRING(), /* 7 */
        DataTypes.INT(), // part-4
        DataTypes.DECIMAL(10, 5), /* 9 */
        DataTypes.STRING(), /* 11*/
        DataTypes.INT(), /* 12*/
        DataTypes.INT(), // part-5
        DataTypes.STRING(), /* 14*/
        DataTypes.INT() }, partSpec, split)) {
            // read and count all rows
            while (!reader.reachedEnd()) {
                RowData row = reader.nextRecord(null);
                // data values
                Assert.assertFalse(row.isNullAt(3));
                Assert.assertFalse(row.isNullAt(5));
                totalF0 += row.getInt(3);
                Assert.assertNotNull(row.getString(5).toString());
                // part values
                Assert.assertFalse(row.isNullAt(0));
                Assert.assertFalse(row.isNullAt(1));
                Assert.assertFalse(row.isNullAt(2));
                Assert.assertFalse(row.isNullAt(4));
                Assert.assertEquals(DecimalDataUtils.castFrom(5.333, 10, 5), row.getDecimal(0, 10, 5));
                Assert.assertEquals(1, row.getInt(1));
                Assert.assertEquals(3, row.getLong(2));
                Assert.assertEquals("f5", row.getString(4).toString());
                cnt++;
            }
        }
    }
    // check that all rows have been read
    assertEquals(1920800, cnt);
    assertEquals(1844737280400L, totalF0);
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) RowData(org.apache.flink.table.data.RowData) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 49 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class OrcColumnarRowSplitReaderTest method testReadFileWithTypes.

@Test
public void testReadFileWithTypes() throws IOException {
    File folder = TEMPORARY_FOLDER.newFolder();
    String file = new File(folder, "testOrc").getPath();
    int rowSize = 1024;
    prepareReadFileWithTypes(file, rowSize);
    // second test read.
    FileInputSplit split = createSplits(new Path(file), 1)[0];
    int cnt = 0;
    Map<String, Object> partSpec = new HashMap<>();
    partSpec.put("f5", true);
    partSpec.put("f6", new Date(562423));
    partSpec.put("f7", LocalDateTime.of(1999, 1, 1, 1, 1));
    partSpec.put("f8", 6.6);
    partSpec.put("f9", null);
    partSpec.put("f10", null);
    partSpec.put("f11", null);
    partSpec.put("f12", null);
    partSpec.put("f13", null);
    try (OrcColumnarRowSplitReader reader = createReader(new int[] { 2, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }, new DataType[] { DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.TIMESTAMP(), DataTypes.TINYINT(), DataTypes.SMALLINT(), DataTypes.BOOLEAN(), DataTypes.DATE(), DataTypes.TIMESTAMP(), DataTypes.DOUBLE(), DataTypes.DOUBLE(), DataTypes.INT(), DataTypes.STRING(), DataTypes.TIMESTAMP(), DataTypes.DECIMAL(5, 3) }, partSpec, split)) {
        // read and count all rows
        while (!reader.reachedEnd()) {
            RowData row = reader.nextRecord(null);
            if (cnt == rowSize - 1) {
                Assert.assertTrue(row.isNullAt(0));
                Assert.assertTrue(row.isNullAt(1));
                Assert.assertTrue(row.isNullAt(2));
                Assert.assertTrue(row.isNullAt(3));
                Assert.assertTrue(row.isNullAt(4));
            } else {
                Assert.assertFalse(row.isNullAt(0));
                Assert.assertFalse(row.isNullAt(1));
                Assert.assertFalse(row.isNullAt(2));
                Assert.assertFalse(row.isNullAt(3));
                Assert.assertFalse(row.isNullAt(4));
                Assert.assertEquals(TimestampData.fromTimestamp(toTimestamp(cnt)), row.getTimestamp(0, 9));
                Assert.assertEquals(cnt, row.getFloat(1), 0);
                Assert.assertEquals(cnt, row.getDouble(2), 0);
                Assert.assertEquals((byte) cnt, row.getByte(3));
                Assert.assertEquals(cnt, row.getShort(4));
            }
            Assert.assertTrue(row.getBoolean(5));
            Assert.assertEquals(new Date(562423).toString(), toSQLDate(row.getInt(6)).toString());
            Assert.assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(7, 9).toLocalDateTime());
            Assert.assertEquals(6.6, row.getDouble(8), 0);
            Assert.assertTrue(row.isNullAt(9));
            Assert.assertTrue(row.isNullAt(10));
            Assert.assertTrue(row.isNullAt(11));
            Assert.assertTrue(row.isNullAt(12));
            Assert.assertTrue(row.isNullAt(13));
            cnt++;
        }
    }
    // check that all rows have been read
    assertEquals(rowSize, cnt);
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) RowData(org.apache.flink.table.data.RowData) HashMap(java.util.HashMap) OrcFile(org.apache.orc.OrcFile) File(java.io.File) DateTimeUtils.toSQLDate(org.apache.flink.table.utils.DateTimeUtils.toSQLDate) Date(java.sql.Date) Test(org.junit.Test)

Example 50 with RowData

use of org.apache.flink.table.data.RowData in project flink by apache.

the class PythonTableFunctionOperator method bufferInput.

@Override
public void bufferInput(RowData input) {
    // always copy the input RowData
    RowData forwardedFields = forwardedInputSerializer.copy(input);
    forwardedFields.setRowKind(input.getRowKind());
    forwardedInputQueue.add(forwardedFields);
}
Also used : RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) GenericRowData(org.apache.flink.table.data.GenericRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData)

Aggregations

RowData (org.apache.flink.table.data.RowData)602 Test (org.junit.Test)201 GenericRowData (org.apache.flink.table.data.GenericRowData)178 ArrayList (java.util.ArrayList)109 RowType (org.apache.flink.table.types.logical.RowType)105 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)90 Watermark (org.apache.flink.streaming.api.watermark.Watermark)84 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)72 Transformation (org.apache.flink.api.dag.Transformation)70 Configuration (org.apache.flink.configuration.Configuration)68 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)67 List (java.util.List)65 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)54 DataType (org.apache.flink.table.types.DataType)52 Map (java.util.Map)42 LogicalType (org.apache.flink.table.types.logical.LogicalType)41 TableException (org.apache.flink.table.api.TableException)34 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)33 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)32 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)31