Search in sources :

Example 1 with TextFileFormat

use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat in project incubator-inlong by apache.

the class HiveSinkITCase method prepareSinkSchema.

private HiveSinkInfo prepareSinkSchema() {
    final FieldInfo f1 = new FieldInfo(fieldName1, new TimestampFormatInfo("MILLIS"));
    final FieldInfo f2 = new FieldInfo(fieldName2, IntFormatInfo.INSTANCE);
    final FieldInfo f3 = new FieldInfo(fieldName3, StringFormatInfo.INSTANCE);
    final FieldInfo f4 = new FieldInfo(fieldName4, StringFormatInfo.INSTANCE);
    final HiveTimePartitionInfo timePartition = new HiveTimePartitionInfo(f1.getName(), timePartitionFormat);
    final HiveFieldPartitionInfo fieldPartition = new HiveFieldPartitionInfo(f2.getName());
    return new HiveSinkInfo(new FieldInfo[] { f1, f2, f3, f4 }, hiveMetastoreUrl, hiveDb, hiveTable, hiveUsername, hivePassword, dfsSchema + hdfsDataDir, new HivePartitionInfo[] { timePartition, fieldPartition }, new TextFileFormat("\t".charAt(0)));
}
Also used : HiveFieldPartitionInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFieldPartitionInfo) TimestampFormatInfo(org.apache.inlong.sort.formats.common.TimestampFormatInfo) HiveTimePartitionInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveTimePartitionInfo) HiveSinkInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo) TextFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo)

Example 2 with TextFileFormat

use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat in project incubator-inlong by apache.

the class TextRowWriterTest method testWrite.

@Test
public void testWrite() throws IOException {
    File file = temporaryFolder.newFile("test.txt");
    TextRowWriter textRowWriter = new TextRowWriter(new LocalDataOutputStream(file), new TextFileFormat(','), new Configuration(), new LogicalType[] { new CharType(), new IntType() });
    textRowWriter.addElement(Row.of("zhangsan", 1));
    textRowWriter.addElement(Row.of("lisi", 2));
    textRowWriter.flush();
    textRowWriter.finish();
    final List<String> results = new ArrayList<>();
    try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file.getAbsolutePath()), StandardCharsets.UTF_8))) {
        String line;
        while ((line = br.readLine()) != null) {
            results.add(line);
        }
    }
    assertEquals(2, results.size());
    assertEquals("zhangsan,1", results.get(0));
    assertEquals("lisi,2", results.get(1));
}
Also used : LocalDataOutputStream(org.apache.flink.core.fs.local.LocalDataOutputStream) Configuration(org.apache.inlong.sort.configuration.Configuration) InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) FileInputStream(java.io.FileInputStream) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) BufferedReader(java.io.BufferedReader) CharType(org.apache.flink.table.types.logical.CharType) File(java.io.File) TextFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat) Test(org.junit.Test)

Example 3 with TextFileFormat

use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat in project incubator-inlong by apache.

the class TextRowWriterTest method testWriteGzip.

@Test
public void testWriteGzip() throws IOException {
    File gzipFile = temporaryFolder.newFile("test.gz");
    TextRowWriter textRowWriter = new TextRowWriter(new LocalDataOutputStream(gzipFile), new TextFileFormat(',', CompressionType.GZIP), new Configuration(), new LogicalType[] { new CharType(), new IntType() });
    textRowWriter.addElement(Row.of("zhangsan", 1));
    textRowWriter.addElement(Row.of("lisi", 2));
    textRowWriter.flush();
    textRowWriter.finish();
    assertTrue(isSameFile(gzipFile.getAbsolutePath(), "src/test/resources/testGzip.gz"));
}
Also used : LocalDataOutputStream(org.apache.flink.core.fs.local.LocalDataOutputStream) Configuration(org.apache.inlong.sort.configuration.Configuration) CharType(org.apache.flink.table.types.logical.CharType) File(java.io.File) TextFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) Test(org.junit.Test)

Example 4 with TextFileFormat

use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat in project incubator-inlong by apache.

the class TextRowWriterTest method testWriteLZO.

@Test
public void testWriteLZO() throws IOException {
    File lzoFile = temporaryFolder.newFile("test.lzo");
    TextRowWriter textRowWriter = new TextRowWriter(new LocalDataOutputStream(lzoFile), new TextFileFormat(',', CompressionType.LZO), new Configuration(), new LogicalType[] { new CharType(), new IntType() });
    textRowWriter.addElement(Row.of("zhangsan", 1));
    textRowWriter.addElement(Row.of("lisi", 2));
    textRowWriter.finish();
    LzopCodec lzopCodec = new LzopCodec();
    CompressionInputStream compressionInputStream = lzopCodec.createInputStream(new FileInputStream(lzoFile.getAbsolutePath()));
    final List<String> results = new ArrayList<>();
    try (BufferedReader br = new BufferedReader(new InputStreamReader(compressionInputStream, StandardCharsets.UTF_8))) {
        String line;
        while ((line = br.readLine()) != null) {
            results.add(line);
        }
    }
    assertEquals(2, results.size());
    assertEquals("zhangsan,1", results.get(0));
    assertEquals("lisi,2", results.get(1));
}
Also used : LocalDataOutputStream(org.apache.flink.core.fs.local.LocalDataOutputStream) Configuration(org.apache.inlong.sort.configuration.Configuration) InputStreamReader(java.io.InputStreamReader) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) ArrayList(java.util.ArrayList) FileInputStream(java.io.FileInputStream) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) LzopCodec(io.airlift.compress.lzo.LzopCodec) BufferedReader(java.io.BufferedReader) CharType(org.apache.flink.table.types.logical.CharType) File(java.io.File) TextFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat) Test(org.junit.Test)

Example 5 with TextFileFormat

use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat in project incubator-inlong by apache.

the class JdbcHivePartitionTool method main.

public static void main(String[] args) throws Exception {
    final Configuration config = ParameterTool.fromArgs(args).getConfiguration();
    final List<HivePartitionInfo> partitions = new ArrayList<>();
    final List<String> partitionValues = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        String partitionName = config.getString("partition_" + i + "_name", null);
        if (partitionName != null) {
            partitions.add(new HiveFieldPartitionInfo(partitionName));
            partitionValues.add(config.getString("partition_" + i + "_value", ""));
        }
    }
    final String database = config.getString("database", null);
    final String table = config.getString("table", null);
    HiveSinkInfo hiveSinkInfo = new HiveSinkInfo(new FieldInfo[0], config.getString("metastore_address", null), database, table, config.getString("username", null), config.getString("password", null), config.getString("root_path", null), partitions.toArray(new HivePartitionInfo[0]), new TextFileFormat("\t".charAt(0)));
    JdbcHivePartitionCommitPolicy committer = new JdbcHivePartitionCommitPolicy(config, hiveSinkInfo);
    try {
        committer.commit(new Context() {

            @Override
            public String databaseName() {
                return database;
            }

            @Override
            public String tableName() {
                return table;
            }

            @Override
            public HivePartition partition() {
                HivePartition hivePartition = new HivePartition();
                List<Tuple2<String, String>> partitionPairs = new ArrayList<>();
                for (int i = 0; i < partitions.size(); i++) {
                    partitionPairs.add(Tuple2.of(partitions.get(i).getFieldName(), partitionValues.get(i)));
                }
                // noinspection unchecked
                hivePartition.setPartitions(partitionPairs.toArray(new Tuple2[0]));
                return hivePartition;
            }
        });
    } finally {
        committer.close();
    }
}
Also used : Context(org.apache.inlong.sort.flink.hive.partition.PartitionCommitPolicy.Context) Configuration(org.apache.inlong.sort.configuration.Configuration) ArrayList(java.util.ArrayList) HivePartitionInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HivePartitionInfo) HiveFieldPartitionInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFieldPartitionInfo) HiveSinkInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo) List(java.util.List) ArrayList(java.util.ArrayList) TextFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat)

Aggregations

TextFileFormat (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat)7 Configuration (org.apache.inlong.sort.configuration.Configuration)4 File (java.io.File)3 ArrayList (java.util.ArrayList)3 LocalDataOutputStream (org.apache.flink.core.fs.local.LocalDataOutputStream)3 BigIntType (org.apache.flink.table.types.logical.BigIntType)3 CharType (org.apache.flink.table.types.logical.CharType)3 IntType (org.apache.flink.table.types.logical.IntType)3 HiveSinkInfo (org.apache.inlong.sort.protocol.sink.HiveSinkInfo)3 Test (org.junit.Test)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 InputStreamReader (java.io.InputStreamReader)2 FieldInfo (org.apache.inlong.sort.protocol.FieldInfo)2 HiveFieldPartitionInfo (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFieldPartitionInfo)2 HivePartitionInfo (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HivePartitionInfo)2 LzopCodec (io.airlift.compress.lzo.LzopCodec)1 HashMap (java.util.HashMap)1 List (java.util.List)1 LogicalType (org.apache.flink.table.types.logical.LogicalType)1