Search in sources :

Example 1 with HdfsColMeta

use of com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta in project plugins by qlangtech.

the class TestDataXHudiWriter method createDataXWriter.

private static HudiTest createDataXWriter(Optional<FileSystemFactory> fsFactory) {
    final DefaultSparkConnGetter sparkConnGetter = new DefaultSparkConnGetter();
    sparkConnGetter.name = "default";
    sparkConnGetter.master = "spark://sparkmaster:7077";
    DataXHudiWriter writer = new DataXHudiWriter() {

        @Override
        public Class<?> getOwnerClass() {
            return DataXHudiWriter.class;
        }

        @Override
        public IHiveConnGetter getHiveConnMeta() {
            return HdfsFileSystemFactoryTestUtils.createHiveConnGetter();
        }

        @Override
        public ISparkConnGetter getSparkConnGetter() {
            return sparkConnGetter;
        }

        @Override
        public FileSystemFactory getFs() {
            return fsFactory.isPresent() ? fsFactory.get() : HdfsFileSystemFactoryTestUtils.getFileSystemFactory();
        }
    };
    writer.template = DataXHudiWriter.getDftTemplate();
    writer.fsName = HdfsFileSystemFactoryTestUtils.FS_NAME;
    writer.setKey(new KeyedPluginStore.Key(null, HdfsFileSystemFactoryTestUtils.testDataXName.getName(), null));
    writer.tabType = HudiWriteTabType.COW.getValue();
    writer.batchOp = BatchOpMode.BULK_INSERT.getValue();
    writer.shuffleParallelism = 3;
    writer.partitionedBy = "pt";
    // writer.batchByteSize = 3456;
    // writer.batchSize = 9527;
    // writer.dbName = dbName;
    writer.writeMode = "insert";
    // writer.autoCreateTable = true;
    // writer.postSql = "drop table @table";
    // writer.preSql = "drop table @table";
    // writer.dataXName = HdfsFileSystemFactoryTestUtils.testDataXName.getName();
    // writer.dbName = dbName;
    // HudiSelectedTab hudiTab = new HudiSelectedTab() {
    // @Override
    // public List<ColMeta> getCols() {
    // return WriterTemplate.createColMetas();
    // }
    // };
    // //hudiTab.partitionPathField = WriterTemplate.kind;
    // hudiTab.recordField = WriterTemplate.customerregisterId;
    // hudiTab.sourceOrderingField = WriterTemplate.lastVer;
    // hudiTab.setWhere("1=1");
    // hudiTab.name = WriterTemplate.TAB_customer_order_relation;
    List<HdfsColMeta> colsMeta = HdfsColMeta.getColsMeta(Configuration.from(IOUtils.loadResourceFromClasspath(writer.getClass(), hudi_datax_writer_assert_without_optional)).getConfiguration(cfgPathParameter));
    HudiSelectedTab tab = new HudiSelectedTab() {

        @Override
        public List<ColMeta> getCols() {
            return colsMeta.stream().map((c) -> {
                ColMeta col = new ColMeta();
                col.setName(c.getName());
                col.setPk(c.pk);
                col.setType(c.type);
                col.setNullable(c.nullable);
                return col;
            }).collect(Collectors.toList());
        }
    };
    tab.name = WriterTemplate.TAB_customer_order_relation;
    tab.partition = new OffPartition();
    tab.sourceOrderingField = "last_ver";
    tab.recordField = "customerregister_id";
    return new HudiTest(writer, WriterTemplate.createCustomer_order_relationTableMap(Optional.of(tab)), tab);
}
Also used : DefaultSparkConnGetter(com.qlangtech.tis.config.spark.impl.DefaultSparkConnGetter) IExecChainContext(com.qlangtech.tis.exec.IExecChainContext) TISCollectionUtils(com.qlangtech.tis.manage.common.TISCollectionUtils) BeforeClass(org.junit.BeforeClass) HudiWriter(com.alibaba.datax.plugin.writer.hudi.HudiWriter) TargetResName(com.qlangtech.tis.coredefine.module.action.TargetResName) Configuration(com.alibaba.datax.common.util.Configuration) ISparkConnGetter(com.qlangtech.tis.config.spark.ISparkConnGetter) Lists(com.google.common.collect.Lists) CenterResource(com.qlangtech.tis.manage.common.CenterResource) DataxProcessor(com.qlangtech.tis.datax.impl.DataxProcessor) HdfsPath(com.qlangtech.tis.hdfs.impl.HdfsPath) IHiveMetaStore(com.qlangtech.tis.config.hive.meta.IHiveMetaStore) TisUTF8(com.qlangtech.tis.manage.common.TisUTF8) DataxUtils(com.qlangtech.tis.offline.DataxUtils) IDataxProcessor(com.qlangtech.tis.datax.IDataxProcessor) KeyedPluginStore(com.qlangtech.tis.plugin.KeyedPluginStore) ITISFileSystem(com.qlangtech.tis.fs.ITISFileSystem) OffPartition(com.qlangtech.tis.plugin.datax.hudi.partition.OffPartition) DataXCfgGenerator(com.qlangtech.tis.datax.impl.DataXCfgGenerator) HdfsFileSystemFactoryTestUtils(com.qlangtech.tis.hdfs.test.HdfsFileSystemFactoryTestUtils) IStreamIncrGenerateStrategy(com.qlangtech.tis.sql.parser.tuple.creator.IStreamIncrGenerateStrategy) IPath(com.qlangtech.tis.fs.IPath) EntityName(com.qlangtech.tis.sql.parser.tuple.creator.EntityName) DataxWriter(com.qlangtech.tis.datax.impl.DataxWriter) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) EasyMock(org.easymock.EasyMock) Collectors(java.util.stream.Collectors) FileSystemFactory(com.qlangtech.tis.offline.FileSystemFactory) File(java.io.File) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) IParamContext(com.qlangtech.tis.order.center.IParamContext) List(java.util.List) IOUtils(com.qlangtech.tis.extension.impl.IOUtils) Rule(org.junit.Rule) MDC(org.slf4j.MDC) HiveTable(com.qlangtech.tis.config.hive.meta.HiveTable) IHiveConnGetter(com.qlangtech.tis.config.hive.IHiveConnGetter) Optional(java.util.Optional) Assert(org.junit.Assert) WriterTemplate(com.qlangtech.tis.plugin.common.WriterTemplate) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) OffPartition(com.qlangtech.tis.plugin.datax.hudi.partition.OffPartition) KeyedPluginStore(com.qlangtech.tis.plugin.KeyedPluginStore) DefaultSparkConnGetter(com.qlangtech.tis.config.spark.impl.DefaultSparkConnGetter)

Example 2 with HdfsColMeta

use of com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta in project plugins by qlangtech.

the class TestDataXHudiWriterTask method testCsvWrite.

@Test
public void testCsvWrite() throws Exception {
    DefaultRecord[] record = new DefaultRecord[1];
    ByteArrayOutputStream output = new ByteArrayOutputStream();
    TisDataXHudiWriter.Task task = new TisDataXHudiWriter.Task() {

        @Override
        public void init() {
            this.fileType = "csv";
            this.writerSliceConfig = Configuration.from(IOUtils.loadResourceFromClasspath(TestDataXHudiWriterTask.class, TestDataXHudiWriter.hudi_datax_writer_assert_without_optional)).getConfiguration("parameter");
            List<HdfsColMeta> colsMeta = HdfsColMeta.getColsMeta(this.writerSliceConfig);
            record[0] = new DefaultRecord();
            for (HdfsColMeta col : colsMeta) {
                // col.csvType
                switch(col.csvType) {
                    case STRING:
                        record[0].addColumn(new StringColumn("{\"name\":\"" + RandomStringUtils.randomAlphanumeric(4) + "\"}"));
                        break;
                    case BOOLEAN:
                        break;
                    case NUMBER:
                        record[0].addColumn(new LongColumn((long) (Math.random() * 1000)));
                        break;
                }
            }
        }

        @Override
        public void prepare() {
            super.prepare();
        }

        @Override
        protected OutputStream getOutputStream(Path targetPath) {
            return output;
        }
    };
    RecordReceiver records = new RecordReceiver() {

        int index = 0;

        @Override
        public Record getFromReader() {
            if (index++ < 1) {
                return record[0];
            }
            return null;
        }

        @Override
        public void shutdown() {
        }
    };
    task.init();
    task.prepare();
    task.startWrite(records);
    System.out.println(new String(output.toByteArray()));
}
Also used : Path(org.apache.hadoop.fs.Path) StringColumn(com.alibaba.datax.common.element.StringColumn) DefaultRecord(com.alibaba.datax.core.transport.record.DefaultRecord) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RecordReceiver(com.alibaba.datax.common.plugin.RecordReceiver) LongColumn(com.alibaba.datax.common.element.LongColumn) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) TisDataXHudiWriter(com.alibaba.datax.plugin.writer.hudi.TisDataXHudiWriter) Test(org.junit.Test)

Example 3 with HdfsColMeta

use of com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta in project plugins by qlangtech.

the class CUDCDCTestSuit method startTest.

public void startTest(MQListenerFactory cdcFactory, String tabName) throws Exception {
    BasicDataXRdbmsReader dataxReader = createDataxReader(dataxName, tabName);
    // replay();
    List<SelectedTab> selectedTabs = dataxReader.getSelectedTabs();
    Optional<SelectedTab> firstSelectedTab = selectedTabs.stream().filter((t) -> tabName.equals(t.name)).findFirst();
    Assert.assertTrue("firstSelectedTab:" + tabName + " must be present", firstSelectedTab.isPresent());
    ISelectedTab tab = firstSelectedTab.get();
    this.cols = Lists.newArrayList();
    HdfsColMeta cMeta = null;
    for (ISelectedTab.ColMeta c : tab.getCols()) {
        cMeta = new HdfsColMeta(c.getName(), c.isNullable(), c.isPk(), c.getType());
        cols.add(cMeta);
    }
    IResultRows consumerHandle = getTestBasicFlinkSourceHandle(tabName);
    cdcFactory.setConsumerHandle(consumerHandle.getConsumerHandle());
    IMQListener<JobExecutionResult> imqListener = cdcFactory.create();
    this.verfiyTableCrudProcess(tabName, dataxReader, tab, consumerHandle, imqListener);
    consumerHandle.cancel();
}
Also used : LineIterator(org.apache.commons.io.LineIterator) java.sql(java.sql) StringUtils(org.apache.commons.lang.StringUtils) java.util(java.util) TargetResName(com.qlangtech.tis.coredefine.module.action.TargetResName) Date(java.util.Date) SimpleDateFormat(java.text.SimpleDateFormat) DTO(com.qlangtech.tis.realtime.transfer.DTO) BigDecimal(java.math.BigDecimal) Lists(com.google.common.collect.Lists) ByteArrayInputStream(java.io.ByteArrayInputStream) TisUTF8(com.qlangtech.tis.manage.common.TisUTF8) MQListenerFactory(com.qlangtech.tis.async.message.client.consumer.impl.MQListenerFactory) IDataxProcessor(com.qlangtech.tis.datax.IDataxProcessor) ParseException(java.text.ParseException) TISSinkFactory(com.qlangtech.tis.plugin.incr.TISSinkFactory) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) BasicDataXRdbmsReader(com.qlangtech.tis.plugin.datax.common.BasicDataXRdbmsReader) RdbmsReaderContext(com.qlangtech.tis.plugin.datax.common.RdbmsReaderContext) com.qlangtech.tis.plugin.ds(com.qlangtech.tis.plugin.ds) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) IOUtils(org.apache.commons.io.IOUtils) TestBasicFlinkSourceHandle(com.qlangtech.plugins.incr.flink.cdc.source.TestBasicFlinkSourceHandle) SelectedTab(com.qlangtech.tis.plugin.datax.SelectedTab) CloseableIterator(org.apache.flink.util.CloseableIterator) RowKind(org.apache.flink.types.RowKind) Row(org.apache.flink.types.Row) Assert(org.junit.Assert) IMQListener(com.qlangtech.tis.async.message.client.consumer.IMQListener) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) SelectedTab(com.qlangtech.tis.plugin.datax.SelectedTab) BasicDataXRdbmsReader(com.qlangtech.tis.plugin.datax.common.BasicDataXRdbmsReader)

Example 4 with HdfsColMeta

use of com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta in project plugins by qlangtech.

the class TestRow method getValsList.

public List<String> getValsList(Optional<RowKind> updateVal, List<HdfsColMeta> keys, ValProcessor processor) throws Exception {
    RowKind rowKind = updateVal.isPresent() ? updateVal.get() : this.kind;
    List<String> valsEnum = Lists.newArrayList(rowKind.shortString());
    for (HdfsColMeta key : keys) {
        Object val = null;
        if (rowKind != RowKind.INSERT) {
            RowValsUpdate.UpdatedColVal uptColVal = (RowValsUpdate.UpdatedColVal) updateVals.getObj(key.getName());
            if (uptColVal != null) {
                val = uptColVal.updatedVal;
            }
        }
        if (val == null) {
            val = vals.getObj(key.getName());
        }
        valsEnum.add(key.getName() + ":" + processor.process(vals, key.getName(), val));
    }
    return valsEnum;
}
Also used : HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) RowKind(org.apache.flink.types.RowKind)

Aggregations

HdfsColMeta (com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta)4 Lists (com.google.common.collect.Lists)2 TargetResName (com.qlangtech.tis.coredefine.module.action.TargetResName)2 IDataxProcessor (com.qlangtech.tis.datax.IDataxProcessor)2 TisUTF8 (com.qlangtech.tis.manage.common.TisUTF8)2 RowKind (org.apache.flink.types.RowKind)2 Test (org.junit.Test)2 LongColumn (com.alibaba.datax.common.element.LongColumn)1 StringColumn (com.alibaba.datax.common.element.StringColumn)1 RecordReceiver (com.alibaba.datax.common.plugin.RecordReceiver)1 Configuration (com.alibaba.datax.common.util.Configuration)1 DefaultRecord (com.alibaba.datax.core.transport.record.DefaultRecord)1 HudiWriter (com.alibaba.datax.plugin.writer.hudi.HudiWriter)1 TisDataXHudiWriter (com.alibaba.datax.plugin.writer.hudi.TisDataXHudiWriter)1 Maps (com.google.common.collect.Maps)1 TestBasicFlinkSourceHandle (com.qlangtech.plugins.incr.flink.cdc.source.TestBasicFlinkSourceHandle)1 IMQListener (com.qlangtech.tis.async.message.client.consumer.IMQListener)1 MQListenerFactory (com.qlangtech.tis.async.message.client.consumer.impl.MQListenerFactory)1 IHiveConnGetter (com.qlangtech.tis.config.hive.IHiveConnGetter)1 HiveTable (com.qlangtech.tis.config.hive.meta.HiveTable)1