Search in sources :

Example 6 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class HiveTableSinkITCase method testStreamingWriteWithCustomPartitionCommitPolicy.

private void testStreamingWriteWithCustomPartitionCommitPolicy(String customPartitionCommitPolicyClassName) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.enableCheckpointing(100);
    // avoid the job to restart infinitely
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 1_000));
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env);
    tEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
    tEnv.useCatalog(hiveCatalog.getName());
    tEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
    try {
        tEnv.executeSql("create database db1");
        tEnv.useDatabase("db1");
        // prepare source
        List<Row> data = Arrays.asList(Row.of(1, "a", "b", "2020-05-03", "7"), Row.of(2, "p", "q", "2020-05-03", "8"), Row.of(3, "x", "y", "2020-05-03", "9"), Row.of(4, "x", "y", "2020-05-03", "10"), Row.of(5, "x", "y", "2020-05-03", "11"));
        DataStream<Row> stream = env.addSource(new FiniteTestSource<>(data), new RowTypeInfo(Types.INT, Types.STRING, Types.STRING, Types.STRING, Types.STRING));
        tEnv.createTemporaryView("my_table", stream, $("a"), $("b"), $("c"), $("d"), $("e"));
        // DDL
        tEnv.executeSql("create external table sink_table (a int,b string,c string" + ") " + "partitioned by (d string,e string) " + " stored as textfile" + " TBLPROPERTIES (" + "'" + SINK_PARTITION_COMMIT_DELAY.key() + "'='1h'," + "'" + SINK_PARTITION_COMMIT_POLICY_KIND.key() + "'='metastore,custom'," + "'" + SINK_PARTITION_COMMIT_POLICY_CLASS.key() + "'='" + customPartitionCommitPolicyClassName + "'" + ")");
        // hive dialect only works with hive tables at the moment, switch to default dialect
        tEnv.getConfig().setSqlDialect(SqlDialect.DEFAULT);
        tEnv.sqlQuery("select * from my_table").executeInsert("sink_table").await();
        // check committed partitions for CustomizedCommitPolicy
        Set<String> committedPaths = TestCustomCommitPolicy.getCommittedPartitionPathsAndReset();
        String base = URI.create(hiveCatalog.getHiveTable(ObjectPath.fromString("db1.sink_table")).getSd().getLocation()).getPath();
        List<String> partitionKVs = Lists.newArrayList("e=7", "e=8", "e=9", "e=10", "e=11");
        partitionKVs.forEach(partitionKV -> {
            String partitionPath = new Path(new Path(base, "d=2020-05-03"), partitionKV).toString();
            Assert.assertTrue("Partition(d=2020-05-03, " + partitionKV + ") is not committed successfully", committedPaths.contains(partitionPath));
        });
    } finally {
        tEnv.executeSql("drop database if exists db1 cascade");
    }
}
Also used : Path(org.apache.flink.core.fs.Path) ObjectPath(org.apache.flink.table.catalog.ObjectPath) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo)

Example 7 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class PythonOperatorChainingOptimizerTest method testChainingMultipleOperators.

@Test
public void testChainingMultipleOperators() {
    PythonKeyedProcessOperator<?> keyedProcessOperator = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f2", Types.STRING(), Types.LONG());
    PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f3", Types.LONG(), Types.INT());
    Transformation<?> sourceTransformation = mock(SourceTransformation.class);
    OneInputTransformation<?, ?> keyedProcessTransformation = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator, keyedProcessOperator.getProducedType(), 2);
    Transformation<?> processTransformation1 = new OneInputTransformation(keyedProcessTransformation, "process", processOperator1, processOperator1.getProducedType(), 2);
    Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process", processOperator2, processOperator2.getProducedType(), 2);
    List<Transformation<?>> transformations = new ArrayList<>();
    transformations.add(sourceTransformation);
    transformations.add(keyedProcessTransformation);
    transformations.add(processTransformation1);
    transformations.add(processTransformation2);
    List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
    assertEquals(2, optimized.size());
    OneInputTransformation<?, ?> chainedTransformation = (OneInputTransformation<?, ?>) optimized.get(1);
    assertEquals(sourceTransformation.getOutputType(), chainedTransformation.getInputType());
    assertEquals(processOperator2.getProducedType(), chainedTransformation.getOutputType());
    OneInputStreamOperator<?, ?> chainedOperator = chainedTransformation.getOperator();
    assertTrue(chainedOperator instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator).getPythonFunctionInfo(), "f3", "f2", "f1");
}
Also used : SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) PythonKeyedProcessOperator(org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator) ArrayList(java.util.ArrayList) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Example 8 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class PythonOperatorChainingOptimizerTest method testContinuousKeyedOperators.

@Test
public void testContinuousKeyedOperators() {
    PythonKeyedProcessOperator<?> keyedProcessOperator1 = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), new RowTypeInfo(Types.INT(), Types.INT()));
    PythonKeyedProcessOperator<?> keyedProcessOperator2 = createKeyedProcessOperator("f2", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    Transformation<?> sourceTransformation = mock(SourceTransformation.class);
    OneInputTransformation<?, ?> processTransformation1 = new OneInputTransformation(sourceTransformation, "KeyedProcess1", keyedProcessOperator1, keyedProcessOperator1.getProducedType(), 2);
    OneInputTransformation<?, ?> processTransformation2 = new OneInputTransformation(processTransformation1, "KeyedProcess2", keyedProcessOperator2, keyedProcessOperator2.getProducedType(), 2);
    List<Transformation<?>> transformations = new ArrayList<>();
    transformations.add(sourceTransformation);
    transformations.add(processTransformation1);
    transformations.add(processTransformation2);
    List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
    assertEquals(3, optimized.size());
    assertEquals(processTransformation1, optimized.get(1));
    assertEquals(processTransformation2, optimized.get(2));
}
Also used : SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) ArrayList(java.util.ArrayList) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Example 9 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class PythonOperatorChainingOptimizerTest method testSingleTransformation.

@Test
public void testSingleTransformation() {
    PythonKeyedProcessOperator<?> keyedProcessOperator = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
    PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f2", Types.STRING(), Types.LONG());
    PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f3", Types.LONG(), Types.INT());
    Transformation<?> sourceTransformation = mock(SourceTransformation.class);
    OneInputTransformation<?, ?> keyedProcessTransformation = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator, keyedProcessOperator.getProducedType(), 2);
    Transformation<?> processTransformation1 = new OneInputTransformation(keyedProcessTransformation, "process", processOperator1, processOperator1.getProducedType(), 2);
    Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process", processOperator2, processOperator2.getProducedType(), 2);
    List<Transformation<?>> transformations = new ArrayList<>();
    transformations.add(processTransformation2);
    List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
    assertEquals(2, optimized.size());
    OneInputTransformation<?, ?> chainedTransformation = (OneInputTransformation<?, ?>) optimized.get(0);
    assertEquals(sourceTransformation.getOutputType(), chainedTransformation.getInputType());
    assertEquals(processOperator2.getProducedType(), chainedTransformation.getOutputType());
    OneInputStreamOperator<?, ?> chainedOperator = chainedTransformation.getOperator();
    assertTrue(chainedOperator instanceof PythonKeyedProcessOperator);
    validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator).getPythonFunctionInfo(), "f3", "f2", "f1");
}
Also used : SourceTransformation(org.apache.flink.streaming.api.transformations.SourceTransformation) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Transformation(org.apache.flink.api.dag.Transformation) PythonKeyedProcessOperator(org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator) ArrayList(java.util.ArrayList) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) OneInputTransformation(org.apache.flink.streaming.api.transformations.OneInputTransformation) Test(org.junit.Test)

Example 10 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowComparatorWithManyFieldsTests method setUp.

@BeforeClass
public static void setUp() throws Exception {
    TypeInformation<?>[] fieldTypes = new TypeInformation[numberOfFields];
    for (int i = 0; i < numberOfFields; i++) {
        fieldTypes[i] = BasicTypeInfo.STRING_TYPE_INFO;
    }
    typeInfo = new RowTypeInfo(fieldTypes);
}
Also used : RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) BeforeClass(org.junit.BeforeClass)

Aggregations

RowTypeInfo (org.apache.flink.api.java.typeutils.RowTypeInfo)50 Test (org.junit.Test)34 Row (org.apache.flink.types.Row)32 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)26 Configuration (org.apache.flink.configuration.Configuration)16 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)15 ArrayList (java.util.ArrayList)10 Transformation (org.apache.flink.api.dag.Transformation)8 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)8 SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)8 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)8 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 PythonKeyedProcessOperator (org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator)6 IOException (java.io.IOException)4 MapTypeInfo (org.apache.flink.api.java.typeutils.MapTypeInfo)4 File (java.io.File)3 FileOutputStream (java.io.FileOutputStream)3 OutputStreamWriter (java.io.OutputStreamWriter)3 LocalDateTime (java.time.LocalDateTime)3 PrimitiveArrayTypeInfo (org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo)3