Search in sources :

Example 21 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class CsvRowDataSerDeSchemaTest method testSerializationWithTypesMismatch.

@Test
public void testSerializationWithTypesMismatch() {
    DataType dataType = ROW(FIELD("f0", STRING()), FIELD("f1", INT()), FIELD("f2", INT()));
    RowType rowType = (RowType) dataType.getLogicalType();
    CsvRowDataSerializationSchema.Builder serSchemaBuilder = new CsvRowDataSerializationSchema.Builder(rowType);
    RowData rowData = rowData("Test", 1, "Test");
    String errorMessage = "Fail to serialize at field: f2.";
    try {
        serialize(serSchemaBuilder, rowData);
        fail("expecting exception message:" + errorMessage);
    } catch (Throwable t) {
        assertThat(t, FlinkMatchers.containsMessage(errorMessage));
    }
}
Also used : GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) StringData.fromString(org.apache.flink.table.data.StringData.fromString) Test(org.junit.Test)

Example 22 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class ArrowUtils method createArrowFieldWriterForArray.

private static ArrowFieldWriter<ArrayData> createArrowFieldWriterForArray(ValueVector vector, LogicalType fieldType) {
    if (vector instanceof TinyIntVector) {
        return TinyIntWriter.forArray((TinyIntVector) vector);
    } else if (vector instanceof SmallIntVector) {
        return SmallIntWriter.forArray((SmallIntVector) vector);
    } else if (vector instanceof IntVector) {
        return IntWriter.forArray((IntVector) vector);
    } else if (vector instanceof BigIntVector) {
        return BigIntWriter.forArray((BigIntVector) vector);
    } else if (vector instanceof BitVector) {
        return BooleanWriter.forArray((BitVector) vector);
    } else if (vector instanceof Float4Vector) {
        return FloatWriter.forArray((Float4Vector) vector);
    } else if (vector instanceof Float8Vector) {
        return DoubleWriter.forArray((Float8Vector) vector);
    } else if (vector instanceof VarCharVector) {
        return VarCharWriter.forArray((VarCharVector) vector);
    } else if (vector instanceof VarBinaryVector) {
        return VarBinaryWriter.forArray((VarBinaryVector) vector);
    } else if (vector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) vector;
        return DecimalWriter.forArray(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
    } else if (vector instanceof DateDayVector) {
        return DateWriter.forArray((DateDayVector) vector);
    } else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
        return TimeWriter.forArray(vector);
    } else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
        int precision;
        if (fieldType instanceof LocalZonedTimestampType) {
            precision = ((LocalZonedTimestampType) fieldType).getPrecision();
        } else {
            precision = ((TimestampType) fieldType).getPrecision();
        }
        return TimestampWriter.forArray(vector, precision);
    } else if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        LogicalType elementType = ((ArrayType) fieldType).getElementType();
        return ArrayWriter.forArray(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
    } else if (vector instanceof StructVector) {
        RowType rowType = (RowType) fieldType;
        ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
        for (int i = 0; i < fieldsWriters.length; i++) {
            fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
        }
        return RowWriter.forArray((StructVector) vector, fieldsWriters);
    } else {
        throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) TimeSecVector(org.apache.arrow.vector.TimeSecVector) TimeMilliVector(org.apache.arrow.vector.TimeMilliVector) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ArrowFieldWriter(org.apache.flink.table.runtime.arrow.writers.ArrowFieldWriter) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) TimeNanoVector(org.apache.arrow.vector.TimeNanoVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) BigIntVector(org.apache.arrow.vector.BigIntVector) TimeStampVector(org.apache.arrow.vector.TimeStampVector) TimeMicroVector(org.apache.arrow.vector.TimeMicroVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Example 23 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class ArrowUtils method collectAsPandasDataFrame.

/**
 * Convert Flink table to Pandas DataFrame.
 */
public static CustomIterator<byte[]> collectAsPandasDataFrame(Table table, int maxArrowBatchSize) throws Exception {
    checkArrowUsable();
    BufferAllocator allocator = getRootAllocator().newChildAllocator("collectAsPandasDataFrame", 0, Long.MAX_VALUE);
    RowType rowType = (RowType) table.getResolvedSchema().toSourceRowDataType().getLogicalType();
    DataType defaultRowDataType = TypeConversions.fromLogicalToDataType(rowType);
    VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
    arrowStreamWriter.start();
    Iterator<Row> results = table.execute().collect();
    Iterator<Row> appendOnlyResults;
    if (isAppendOnlyTable(table)) {
        appendOnlyResults = results;
    } else {
        appendOnlyResults = filterOutRetractRows(results);
    }
    ArrowWriter arrowWriter = createRowDataArrowWriter(root, rowType);
    Iterator convertedResults = new Iterator<RowData>() {

        @Override
        public boolean hasNext() {
            return appendOnlyResults.hasNext();
        }

        @Override
        public RowData next() {
            DataFormatConverters.DataFormatConverter converter = DataFormatConverters.getConverterForDataType(defaultRowDataType);
            return (RowData) converter.toInternal(appendOnlyResults.next());
        }
    };
    return new CustomIterator<byte[]>() {

        @Override
        public boolean hasNext() {
            return convertedResults.hasNext();
        }

        @Override
        public byte[] next() {
            try {
                int i = 0;
                while (convertedResults.hasNext() && i < maxArrowBatchSize) {
                    i++;
                    arrowWriter.write(convertedResults.next());
                }
                arrowWriter.finish();
                arrowStreamWriter.writeBatch();
                return baos.toByteArray();
            } catch (Throwable t) {
                String msg = "Failed to serialize the data of the table";
                LOG.error(msg, t);
                throw new RuntimeException(msg, t);
            } finally {
                arrowWriter.reset();
                baos.reset();
                if (!hasNext()) {
                    root.close();
                    allocator.close();
                }
            }
        }
    };
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) RowType(org.apache.flink.table.types.logical.RowType) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ArrowStreamWriter(org.apache.arrow.vector.ipc.ArrowStreamWriter) BufferAllocator(org.apache.arrow.memory.BufferAllocator) RowData(org.apache.flink.table.data.RowData) DataFormatConverters(org.apache.flink.table.data.util.DataFormatConverters) Iterator(java.util.Iterator) DataType(org.apache.flink.table.types.DataType) Row(org.apache.flink.types.Row)

Example 24 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class ArrowUtils method toArrowField.

private static Field toArrowField(String fieldName, LogicalType logicalType) {
    FieldType fieldType = new FieldType(logicalType.isNullable(), logicalType.accept(LogicalTypeToArrowTypeConverter.INSTANCE), null);
    List<Field> children = null;
    if (logicalType instanceof ArrayType) {
        children = Collections.singletonList(toArrowField("element", ((ArrayType) logicalType).getElementType()));
    } else if (logicalType instanceof RowType) {
        RowType rowType = (RowType) logicalType;
        children = new ArrayList<>(rowType.getFieldCount());
        for (RowType.RowField field : rowType.getFields()) {
            children.add(toArrowField(field.getName(), field.getType()));
        }
    }
    return new Field(fieldName, fieldType, children);
}
Also used : ArrayType(org.apache.flink.table.types.logical.ArrayType) Field(org.apache.arrow.vector.types.pojo.Field) ArrayList(java.util.ArrayList) RowType(org.apache.flink.table.types.logical.RowType) FieldType(org.apache.arrow.vector.types.pojo.FieldType)

Example 25 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class KinesisDynamicTableFactoryTest method testGoodTableSinkCopyForPartitionedTable.

@Test
public void testGoodTableSinkCopyForPartitionedTable() {
    ResolvedSchema sinkSchema = defaultSinkSchema();
    DataType physicalDataType = sinkSchema.toPhysicalRowDataType();
    Map<String, String> sinkOptions = defaultSinkTableOptions().build();
    List<String> sinkPartitionKeys = Arrays.asList("name", "curr_id");
    // Construct actual DynamicTableSink using FactoryUtil
    KinesisDynamicSink actualSink = (KinesisDynamicSink) createTableSink(sinkSchema, sinkPartitionKeys, sinkOptions);
    // Construct expected DynamicTableSink using factory under test
    KinesisDynamicSink expectedSink = (KinesisDynamicSink) new KinesisDynamicSink.KinesisDynamicTableSinkBuilder().setConsumedDataType(physicalDataType).setStream(STREAM_NAME).setKinesisClientProperties(defaultProducerProperties()).setEncodingFormat(new TestFormatFactory.EncodingFormatMock(",")).setPartitioner(new RowDataFieldsKinesisPartitionKeyGenerator((RowType) physicalDataType.getLogicalType(), sinkPartitionKeys)).build();
    Assertions.assertThat(actualSink).isEqualTo(expectedSink.copy());
    Assertions.assertThat(expectedSink).isNotSameAs(expectedSink.copy());
}
Also used : RowDataFieldsKinesisPartitionKeyGenerator(org.apache.flink.connector.kinesis.table.RowDataFieldsKinesisPartitionKeyGenerator) KinesisDynamicSink(org.apache.flink.connector.kinesis.table.KinesisDynamicSink) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) Test(org.junit.Test)

Aggregations

RowType (org.apache.flink.table.types.logical.RowType)212 RowData (org.apache.flink.table.data.RowData)108 LogicalType (org.apache.flink.table.types.logical.LogicalType)59 DataType (org.apache.flink.table.types.DataType)57 Transformation (org.apache.flink.api.dag.Transformation)50 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)46 TableException (org.apache.flink.table.api.TableException)37 Test (org.junit.Test)36 GenericRowData (org.apache.flink.table.data.GenericRowData)33 ArrayList (java.util.ArrayList)28 List (java.util.List)28 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)26 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)25 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)22 TableConfig (org.apache.flink.table.api.TableConfig)19 ArrayType (org.apache.flink.table.types.logical.ArrayType)19 TimestampType (org.apache.flink.table.types.logical.TimestampType)19 DecimalType (org.apache.flink.table.types.logical.DecimalType)17 Collections (java.util.Collections)16 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)16