Search in sources :

Example 1 with IHiveConnGetter

use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.

the class TestDataXHudiWriter method createDataXWriter.

private static HudiTest createDataXWriter(Optional<FileSystemFactory> fsFactory) {
    final DefaultSparkConnGetter sparkConnGetter = new DefaultSparkConnGetter();
    sparkConnGetter.name = "default";
    sparkConnGetter.master = "spark://sparkmaster:7077";
    DataXHudiWriter writer = new DataXHudiWriter() {

        @Override
        public Class<?> getOwnerClass() {
            return DataXHudiWriter.class;
        }

        @Override
        public IHiveConnGetter getHiveConnMeta() {
            return HdfsFileSystemFactoryTestUtils.createHiveConnGetter();
        }

        @Override
        public ISparkConnGetter getSparkConnGetter() {
            return sparkConnGetter;
        }

        @Override
        public FileSystemFactory getFs() {
            return fsFactory.isPresent() ? fsFactory.get() : HdfsFileSystemFactoryTestUtils.getFileSystemFactory();
        }
    };
    writer.template = DataXHudiWriter.getDftTemplate();
    writer.fsName = HdfsFileSystemFactoryTestUtils.FS_NAME;
    writer.setKey(new KeyedPluginStore.Key(null, HdfsFileSystemFactoryTestUtils.testDataXName.getName(), null));
    writer.tabType = HudiWriteTabType.COW.getValue();
    writer.batchOp = BatchOpMode.BULK_INSERT.getValue();
    writer.shuffleParallelism = 3;
    writer.partitionedBy = "pt";
    // writer.batchByteSize = 3456;
    // writer.batchSize = 9527;
    // writer.dbName = dbName;
    writer.writeMode = "insert";
    // writer.autoCreateTable = true;
    // writer.postSql = "drop table @table";
    // writer.preSql = "drop table @table";
    // writer.dataXName = HdfsFileSystemFactoryTestUtils.testDataXName.getName();
    // writer.dbName = dbName;
    // HudiSelectedTab hudiTab = new HudiSelectedTab() {
    // @Override
    // public List<ColMeta> getCols() {
    // return WriterTemplate.createColMetas();
    // }
    // };
    // //hudiTab.partitionPathField = WriterTemplate.kind;
    // hudiTab.recordField = WriterTemplate.customerregisterId;
    // hudiTab.sourceOrderingField = WriterTemplate.lastVer;
    // hudiTab.setWhere("1=1");
    // hudiTab.name = WriterTemplate.TAB_customer_order_relation;
    List<HdfsColMeta> colsMeta = HdfsColMeta.getColsMeta(Configuration.from(IOUtils.loadResourceFromClasspath(writer.getClass(), hudi_datax_writer_assert_without_optional)).getConfiguration(cfgPathParameter));
    HudiSelectedTab tab = new HudiSelectedTab() {

        @Override
        public List<ColMeta> getCols() {
            return colsMeta.stream().map((c) -> {
                ColMeta col = new ColMeta();
                col.setName(c.getName());
                col.setPk(c.pk);
                col.setType(c.type);
                col.setNullable(c.nullable);
                return col;
            }).collect(Collectors.toList());
        }
    };
    tab.name = WriterTemplate.TAB_customer_order_relation;
    tab.partition = new OffPartition();
    tab.sourceOrderingField = "last_ver";
    tab.recordField = "customerregister_id";
    return new HudiTest(writer, WriterTemplate.createCustomer_order_relationTableMap(Optional.of(tab)), tab);
}
Also used : DefaultSparkConnGetter(com.qlangtech.tis.config.spark.impl.DefaultSparkConnGetter) IExecChainContext(com.qlangtech.tis.exec.IExecChainContext) TISCollectionUtils(com.qlangtech.tis.manage.common.TISCollectionUtils) BeforeClass(org.junit.BeforeClass) HudiWriter(com.alibaba.datax.plugin.writer.hudi.HudiWriter) TargetResName(com.qlangtech.tis.coredefine.module.action.TargetResName) Configuration(com.alibaba.datax.common.util.Configuration) ISparkConnGetter(com.qlangtech.tis.config.spark.ISparkConnGetter) Lists(com.google.common.collect.Lists) CenterResource(com.qlangtech.tis.manage.common.CenterResource) DataxProcessor(com.qlangtech.tis.datax.impl.DataxProcessor) HdfsPath(com.qlangtech.tis.hdfs.impl.HdfsPath) IHiveMetaStore(com.qlangtech.tis.config.hive.meta.IHiveMetaStore) TisUTF8(com.qlangtech.tis.manage.common.TisUTF8) DataxUtils(com.qlangtech.tis.offline.DataxUtils) IDataxProcessor(com.qlangtech.tis.datax.IDataxProcessor) KeyedPluginStore(com.qlangtech.tis.plugin.KeyedPluginStore) ITISFileSystem(com.qlangtech.tis.fs.ITISFileSystem) OffPartition(com.qlangtech.tis.plugin.datax.hudi.partition.OffPartition) DataXCfgGenerator(com.qlangtech.tis.datax.impl.DataXCfgGenerator) HdfsFileSystemFactoryTestUtils(com.qlangtech.tis.hdfs.test.HdfsFileSystemFactoryTestUtils) IStreamIncrGenerateStrategy(com.qlangtech.tis.sql.parser.tuple.creator.IStreamIncrGenerateStrategy) IPath(com.qlangtech.tis.fs.IPath) EntityName(com.qlangtech.tis.sql.parser.tuple.creator.EntityName) DataxWriter(com.qlangtech.tis.datax.impl.DataxWriter) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) EasyMock(org.easymock.EasyMock) Collectors(java.util.stream.Collectors) FileSystemFactory(com.qlangtech.tis.offline.FileSystemFactory) File(java.io.File) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) IParamContext(com.qlangtech.tis.order.center.IParamContext) List(java.util.List) IOUtils(com.qlangtech.tis.extension.impl.IOUtils) Rule(org.junit.Rule) MDC(org.slf4j.MDC) HiveTable(com.qlangtech.tis.config.hive.meta.HiveTable) IHiveConnGetter(com.qlangtech.tis.config.hive.IHiveConnGetter) Optional(java.util.Optional) Assert(org.junit.Assert) WriterTemplate(com.qlangtech.tis.plugin.common.WriterTemplate) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) OffPartition(com.qlangtech.tis.plugin.datax.hudi.partition.OffPartition) KeyedPluginStore(com.qlangtech.tis.plugin.KeyedPluginStore) DefaultSparkConnGetter(com.qlangtech.tis.config.spark.impl.DefaultSparkConnGetter)

Example 2 with IHiveConnGetter

use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.

the class TestDataXHudiWriter method testRealDump.

@Test
public void testRealDump() throws Exception {
    MDC.put(TISCollectionUtils.KEY_COLLECTION, HdfsFileSystemFactoryTestUtils.testDataXName.getName());
    MDC.put(IParamContext.KEY_TASK_ID, "123");
    HudiTest houseTest = createDataXWriter();
    long timestamp = 20220311135455l;
    // houseTest.writer.autoCreateTable = true;
    DataxProcessor dataXProcessor = EasyMock.mock("dataXProcessor", DataxProcessor.class);
    File dataXCfgDir = folder.newFolder();
    File createDDLDir = folder.newFolder();
    File createDDLFile = null;
    try {
        createDDLFile = new File(createDDLDir, HudiWriter.targetTableName + IDataxProcessor.DATAX_CREATE_DDL_FILE_NAME_SUFFIX);
        FileUtils.write(createDDLFile, com.qlangtech.tis.extension.impl.IOUtils.loadResourceFromClasspath(DataXHudiWriter.class, "create_ddl_customer_order_relation.sql"), TisUTF8.get());
        DataXCfgGenerator.GenerateCfgs genCfg = new DataXCfgGenerator.GenerateCfgs();
        genCfg.setGenTime(timestamp);
        genCfg.setGroupedChildTask(Collections.singletonMap(WriterTemplate.TAB_customer_order_relation, Lists.newArrayList(WriterTemplate.TAB_customer_order_relation + "_0")));
        genCfg.write2GenFile(dataXCfgDir);
        EasyMock.expect(dataXProcessor.getDataxCfgDir(null)).andReturn(dataXCfgDir);
        // EasyMock.expect(dataXProcessor.getDataxCreateDDLDir(null)).andReturn(createDDLDir);
        DataxWriter.dataxWriterGetter = (dataXName) -> {
            return houseTest.writer;
        };
        DataxProcessor.processorGetter = (dataXName) -> {
            Assert.assertEquals(HdfsFileSystemFactoryTestUtils.testDataXName.getName(), dataXName);
            return dataXProcessor;
        };
        IExecChainContext execContext = EasyMock.mock("execContext", IExecChainContext.class);
        EasyMock.expect(execContext.getPartitionTimestamp()).andReturn(String.valueOf(timestamp));
        EasyMock.replay(dataXProcessor, execContext);
        // WriterTemplate.realExecuteDump(hudi_datax_writer_assert_without_optional, houseTest.writer, (cfg) -> {
        // cfg.set(cfgPathParameter + "." + DataxUtils.EXEC_TIMESTAMP, timestamp);
        // return cfg;
        // });
        // DataXHudiWriter hudiWriter = new DataXHudiWriter();
        // hudiWriter.dataXName = HdfsFileSystemFactoryTestUtils.testDataXName.getName();
        // hudiWriter.createPostTask(execContext, tab);
        HudiDumpPostTask postTask = (HudiDumpPostTask) houseTest.writer.createPostTask(execContext, houseTest.tab);
        Assert.assertNotNull("postTask can not be null", postTask);
        postTask.run();
        IHiveConnGetter hiveConnMeta = houseTest.writer.getHiveConnMeta();
        try (IHiveMetaStore metaStoreClient = hiveConnMeta.createMetaStoreClient()) {
            Assert.assertNotNull(metaStoreClient);
            HiveTable table = metaStoreClient.getTable(hiveConnMeta.getDbName(), WriterTemplate.TAB_customer_order_relation);
            Assert.assertNotNull(WriterTemplate.TAB_customer_order_relation + " can not be null", table);
        }
        EasyMock.verify(dataXProcessor, execContext);
    } finally {
    // FileUtils.deleteQuietly(createDDLFile);
    }
}
Also used : IHiveMetaStore(com.qlangtech.tis.config.hive.meta.IHiveMetaStore) DataXCfgGenerator(com.qlangtech.tis.datax.impl.DataXCfgGenerator) IExecChainContext(com.qlangtech.tis.exec.IExecChainContext) DataxProcessor(com.qlangtech.tis.datax.impl.DataxProcessor) IDataxProcessor(com.qlangtech.tis.datax.IDataxProcessor) File(java.io.File) IHiveConnGetter(com.qlangtech.tis.config.hive.IHiveConnGetter) HiveTable(com.qlangtech.tis.config.hive.meta.HiveTable) Test(org.junit.Test)

Example 3 with IHiveConnGetter

use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.

the class HdfsFileSystemFactoryTestUtils method createHiveConnGetter.

public static IHiveConnGetter createHiveConnGetter() {
    Descriptor hiveConnGetter = TIS.get().getDescriptor("DefaultHiveConnGetter");
    Assert.assertNotNull(hiveConnGetter);
    // 使用hudi的docker运行环境 https://hudi.apache.org/docs/docker_demo#step-3-sync-with-hive
    Descriptor.FormData formData = new Descriptor.FormData();
    formData.addProp("name", "testhiveConn");
    formData.addProp("hiveAddress", "hiveserver:10000");
    formData.addProp("useUserToken", "true");
    formData.addProp("dbName", "default");
    formData.addProp("password", "hive");
    formData.addProp("userName", "hive");
    formData.addProp("metaStoreUrls", "thrift://hiveserver:9083");
    Descriptor.ParseDescribable<IHiveConnGetter> parseDescribable = hiveConnGetter.newInstance(HdfsFileSystemFactoryTestUtils.testDataXName.getName(), formData);
    Assert.assertNotNull(parseDescribable.instance);
    Assert.assertNotNull(parseDescribable.instance);
    return parseDescribable.instance;
}
Also used : Descriptor(com.qlangtech.tis.extension.Descriptor) IHiveConnGetter(com.qlangtech.tis.config.hive.IHiveConnGetter)

Example 4 with IHiveConnGetter

use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.

the class HudiDumpPostTask method writeSourceProps.

private void writeSourceProps(ITISFileSystem fs, IPath dumpDir, IPath fsSourcePropsPath) {
    IPath fsSourceSchemaPath = HudiTableMeta.createFsSourceSchema(fs, this.hudiTab.getName(), dumpDir, this.hudiTab);
    IPath tabDumpParentPath = TisDataXHudiWriter.createTabDumpParentPath(fs, dumpDir);
    try (OutputStream write = fs.create(fsSourcePropsPath, true)) {
        // TypedProperties props = new TypedProperties();
        TypedPropertiesBuilder props = new TypedPropertiesBuilder();
        String shuffleParallelism = String.valueOf(this.hudiWriter.shuffleParallelism);
        props.setProperty("hoodie.upsert.shuffle.parallelism", shuffleParallelism);
        props.setProperty("hoodie.insert.shuffle.parallelism", (shuffleParallelism));
        props.setProperty("hoodie.delete.shuffle.parallelism", (shuffleParallelism));
        props.setProperty("hoodie.bulkinsert.shuffle.parallelism", (shuffleParallelism));
        props.setProperty("hoodie.embed.timeline.server", "true");
        props.setProperty("hoodie.filesystem.view.type", "EMBEDDED_KV_STORE");
        // @see HoodieCompactionConfig.INLINE_COMPACT
        // props.setProperty("hoodie.compact.inline", (hudiTabType == HudiWriteTabType.MOR) ? "true" : "false");
        // BasicFSWriter writerPlugin = this.getWriterPlugin();
        // https://spark.apache.org/docs/3.2.1/sql-data-sources-csv.html
        props.setProperty("hoodie.deltastreamer.source.dfs.root", String.valueOf(tabDumpParentPath));
        props.setProperty("hoodie.deltastreamer.csv.header", Boolean.toString(TisDataXHudiWriter.CSV_FILE_USE_HEADER));
        props.setProperty("hoodie.deltastreamer.csv.sep", String.valueOf(TisDataXHudiWriter.CSV_Column_Separator));
        props.setProperty("hoodie.deltastreamer.csv.nullValue", TisDataXHudiWriter.CSV_NULL_VALUE);
        props.setProperty("hoodie.deltastreamer.csv.escape", String.valueOf(TisDataXHudiWriter.CSV_ESCAPE_CHAR));
        // props.setProperty("hoodie.deltastreamer.csv.escapeQuotes", "false");
        props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", String.valueOf(fsSourceSchemaPath));
        props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", String.valueOf(fsSourceSchemaPath));
        // please reference: DataSourceWriteOptions , HiveSyncConfig
        final IHiveConnGetter hiveMeta = this.hudiWriter.getHiveConnMeta();
        props.setProperty("hoodie.datasource.hive_sync.database", hiveMeta.getDbName());
        props.setProperty("hoodie.datasource.hive_sync.table", this.hudiTab.getName());
        if (this.hudiTab.partition == null) {
            throw new IllegalStateException("hudiPlugin.partitionedBy can not be empty");
        }
        this.hudiTab.partition.setProps(props, this.hudiWriter);
        // props.setProperty("hoodie.datasource.hive_sync.partition_fields", hudiPlugin.partitionedBy);
        // // "org.apache.hudi.hive.MultiPartKeysValueExtractor";
        // // partition 分区值抽取类
        // props.setProperty("hoodie.datasource.hive_sync.partition_extractor_class"
        // , "org.apache.hudi.hive.MultiPartKeysValueExtractor");
        Optional<HiveUserToken> hiveUserToken = hiveMeta.getUserToken();
        if (hiveUserToken.isPresent()) {
            HiveUserToken token = hiveUserToken.get();
            props.setProperty("hoodie.datasource.hive_sync.username", token.userName);
            props.setProperty("hoodie.datasource.hive_sync.password", token.password);
        }
        props.setProperty("hoodie.datasource.hive_sync.jdbcurl", hiveMeta.getJdbcUrl());
        props.setProperty("hoodie.datasource.hive_sync.mode", "jdbc");
        props.setProperty("hoodie.datasource.write.recordkey.field", this.hudiTab.recordField);
        // props.setProperty("hoodie.datasource.write.partitionpath.field", hudiWriter.partitionedBy);
        props.store(write);
    } catch (IOException e) {
        throw new RuntimeException("faild to write " + tabDumpParentPath + " CSV file metaData", e);
    }
}
Also used : HiveUserToken(com.qlangtech.tis.config.hive.HiveUserToken) IPath(com.qlangtech.tis.fs.IPath) OutputStream(java.io.OutputStream) TypedPropertiesBuilder(com.alibaba.datax.plugin.writer.hudi.TypedPropertiesBuilder) IOException(java.io.IOException) IHiveConnGetter(com.qlangtech.tis.config.hive.IHiveConnGetter)

Example 5 with IHiveConnGetter

use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.

the class HudiTableMeta method getHistoryBatchs.

public static List<Option> getHistoryBatchs(ITISFileSystem fs, IHiveConnGetter hiveConn) {
    IPath path = fs.getPath(fs.getRootDir(), hiveConn.getDbName());
    List<IPathInfo> child = fs.listChildren(path);
    return child.stream().map((c) -> new Option(c.getName())).collect(Collectors.toList());
}
Also used : IPathInfo(com.qlangtech.tis.fs.IPathInfo) OutputStream(java.io.OutputStream) IPath(com.qlangtech.tis.fs.IPath) Schema(org.apache.avro.Schema) ISelectedTab(com.qlangtech.tis.plugin.ds.ISelectedTab) Option(com.qlangtech.tis.manage.common.Option) IPathInfo(com.qlangtech.tis.fs.IPathInfo) BasicHdfsWriterJob(com.qlangtech.tis.plugin.datax.BasicHdfsWriterJob) Collectors(java.util.stream.Collectors) Key(com.alibaba.datax.plugin.writer.hdfswriter.Key) Configuration(com.alibaba.datax.common.util.Configuration) HdfsWriterErrorCode(com.alibaba.datax.plugin.writer.hdfswriter.HdfsWriterErrorCode) SchemaBuilder(org.apache.avro.SchemaBuilder) HdfsColMeta(com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) LogicalTypes(org.apache.avro.LogicalTypes) CollectionUtils(org.apache.commons.collections.CollectionUtils) DataType(com.qlangtech.tis.plugin.ds.DataType) IHiveConnGetter(com.qlangtech.tis.config.hive.IHiveConnGetter) TisUTF8(com.qlangtech.tis.manage.common.TisUTF8) DataxUtils(com.qlangtech.tis.offline.DataxUtils) ITISFileSystem(com.qlangtech.tis.fs.ITISFileSystem) IPath(com.qlangtech.tis.fs.IPath) Option(com.qlangtech.tis.manage.common.Option)

Aggregations

IHiveConnGetter (com.qlangtech.tis.config.hive.IHiveConnGetter)5 IPath (com.qlangtech.tis.fs.IPath)3 Configuration (com.alibaba.datax.common.util.Configuration)2 HdfsColMeta (com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta)2 HiveTable (com.qlangtech.tis.config.hive.meta.HiveTable)2 IHiveMetaStore (com.qlangtech.tis.config.hive.meta.IHiveMetaStore)2 IDataxProcessor (com.qlangtech.tis.datax.IDataxProcessor)2 DataXCfgGenerator (com.qlangtech.tis.datax.impl.DataXCfgGenerator)2 DataxProcessor (com.qlangtech.tis.datax.impl.DataxProcessor)2 IExecChainContext (com.qlangtech.tis.exec.IExecChainContext)2 ITISFileSystem (com.qlangtech.tis.fs.ITISFileSystem)2 TisUTF8 (com.qlangtech.tis.manage.common.TisUTF8)2 DataxUtils (com.qlangtech.tis.offline.DataxUtils)2 File (java.io.File)2 OutputStream (java.io.OutputStream)2 List (java.util.List)2 Test (org.junit.Test)2 HdfsWriterErrorCode (com.alibaba.datax.plugin.writer.hdfswriter.HdfsWriterErrorCode)1 Key (com.alibaba.datax.plugin.writer.hdfswriter.Key)1 HudiWriter (com.alibaba.datax.plugin.writer.hudi.HudiWriter)1