use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.
the class TestDataXHudiWriter method createDataXWriter.
private static HudiTest createDataXWriter(Optional<FileSystemFactory> fsFactory) {
final DefaultSparkConnGetter sparkConnGetter = new DefaultSparkConnGetter();
sparkConnGetter.name = "default";
sparkConnGetter.master = "spark://sparkmaster:7077";
DataXHudiWriter writer = new DataXHudiWriter() {
@Override
public Class<?> getOwnerClass() {
return DataXHudiWriter.class;
}
@Override
public IHiveConnGetter getHiveConnMeta() {
return HdfsFileSystemFactoryTestUtils.createHiveConnGetter();
}
@Override
public ISparkConnGetter getSparkConnGetter() {
return sparkConnGetter;
}
@Override
public FileSystemFactory getFs() {
return fsFactory.isPresent() ? fsFactory.get() : HdfsFileSystemFactoryTestUtils.getFileSystemFactory();
}
};
writer.template = DataXHudiWriter.getDftTemplate();
writer.fsName = HdfsFileSystemFactoryTestUtils.FS_NAME;
writer.setKey(new KeyedPluginStore.Key(null, HdfsFileSystemFactoryTestUtils.testDataXName.getName(), null));
writer.tabType = HudiWriteTabType.COW.getValue();
writer.batchOp = BatchOpMode.BULK_INSERT.getValue();
writer.shuffleParallelism = 3;
writer.partitionedBy = "pt";
// writer.batchByteSize = 3456;
// writer.batchSize = 9527;
// writer.dbName = dbName;
writer.writeMode = "insert";
// writer.autoCreateTable = true;
// writer.postSql = "drop table @table";
// writer.preSql = "drop table @table";
// writer.dataXName = HdfsFileSystemFactoryTestUtils.testDataXName.getName();
// writer.dbName = dbName;
// HudiSelectedTab hudiTab = new HudiSelectedTab() {
// @Override
// public List<ColMeta> getCols() {
// return WriterTemplate.createColMetas();
// }
// };
// //hudiTab.partitionPathField = WriterTemplate.kind;
// hudiTab.recordField = WriterTemplate.customerregisterId;
// hudiTab.sourceOrderingField = WriterTemplate.lastVer;
// hudiTab.setWhere("1=1");
// hudiTab.name = WriterTemplate.TAB_customer_order_relation;
List<HdfsColMeta> colsMeta = HdfsColMeta.getColsMeta(Configuration.from(IOUtils.loadResourceFromClasspath(writer.getClass(), hudi_datax_writer_assert_without_optional)).getConfiguration(cfgPathParameter));
HudiSelectedTab tab = new HudiSelectedTab() {
@Override
public List<ColMeta> getCols() {
return colsMeta.stream().map((c) -> {
ColMeta col = new ColMeta();
col.setName(c.getName());
col.setPk(c.pk);
col.setType(c.type);
col.setNullable(c.nullable);
return col;
}).collect(Collectors.toList());
}
};
tab.name = WriterTemplate.TAB_customer_order_relation;
tab.partition = new OffPartition();
tab.sourceOrderingField = "last_ver";
tab.recordField = "customerregister_id";
return new HudiTest(writer, WriterTemplate.createCustomer_order_relationTableMap(Optional.of(tab)), tab);
}
use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.
the class TestDataXHudiWriter method testRealDump.
@Test
public void testRealDump() throws Exception {
MDC.put(TISCollectionUtils.KEY_COLLECTION, HdfsFileSystemFactoryTestUtils.testDataXName.getName());
MDC.put(IParamContext.KEY_TASK_ID, "123");
HudiTest houseTest = createDataXWriter();
long timestamp = 20220311135455l;
// houseTest.writer.autoCreateTable = true;
DataxProcessor dataXProcessor = EasyMock.mock("dataXProcessor", DataxProcessor.class);
File dataXCfgDir = folder.newFolder();
File createDDLDir = folder.newFolder();
File createDDLFile = null;
try {
createDDLFile = new File(createDDLDir, HudiWriter.targetTableName + IDataxProcessor.DATAX_CREATE_DDL_FILE_NAME_SUFFIX);
FileUtils.write(createDDLFile, com.qlangtech.tis.extension.impl.IOUtils.loadResourceFromClasspath(DataXHudiWriter.class, "create_ddl_customer_order_relation.sql"), TisUTF8.get());
DataXCfgGenerator.GenerateCfgs genCfg = new DataXCfgGenerator.GenerateCfgs();
genCfg.setGenTime(timestamp);
genCfg.setGroupedChildTask(Collections.singletonMap(WriterTemplate.TAB_customer_order_relation, Lists.newArrayList(WriterTemplate.TAB_customer_order_relation + "_0")));
genCfg.write2GenFile(dataXCfgDir);
EasyMock.expect(dataXProcessor.getDataxCfgDir(null)).andReturn(dataXCfgDir);
// EasyMock.expect(dataXProcessor.getDataxCreateDDLDir(null)).andReturn(createDDLDir);
DataxWriter.dataxWriterGetter = (dataXName) -> {
return houseTest.writer;
};
DataxProcessor.processorGetter = (dataXName) -> {
Assert.assertEquals(HdfsFileSystemFactoryTestUtils.testDataXName.getName(), dataXName);
return dataXProcessor;
};
IExecChainContext execContext = EasyMock.mock("execContext", IExecChainContext.class);
EasyMock.expect(execContext.getPartitionTimestamp()).andReturn(String.valueOf(timestamp));
EasyMock.replay(dataXProcessor, execContext);
// WriterTemplate.realExecuteDump(hudi_datax_writer_assert_without_optional, houseTest.writer, (cfg) -> {
// cfg.set(cfgPathParameter + "." + DataxUtils.EXEC_TIMESTAMP, timestamp);
// return cfg;
// });
// DataXHudiWriter hudiWriter = new DataXHudiWriter();
// hudiWriter.dataXName = HdfsFileSystemFactoryTestUtils.testDataXName.getName();
// hudiWriter.createPostTask(execContext, tab);
HudiDumpPostTask postTask = (HudiDumpPostTask) houseTest.writer.createPostTask(execContext, houseTest.tab);
Assert.assertNotNull("postTask can not be null", postTask);
postTask.run();
IHiveConnGetter hiveConnMeta = houseTest.writer.getHiveConnMeta();
try (IHiveMetaStore metaStoreClient = hiveConnMeta.createMetaStoreClient()) {
Assert.assertNotNull(metaStoreClient);
HiveTable table = metaStoreClient.getTable(hiveConnMeta.getDbName(), WriterTemplate.TAB_customer_order_relation);
Assert.assertNotNull(WriterTemplate.TAB_customer_order_relation + " can not be null", table);
}
EasyMock.verify(dataXProcessor, execContext);
} finally {
// FileUtils.deleteQuietly(createDDLFile);
}
}
use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.
the class HdfsFileSystemFactoryTestUtils method createHiveConnGetter.
public static IHiveConnGetter createHiveConnGetter() {
Descriptor hiveConnGetter = TIS.get().getDescriptor("DefaultHiveConnGetter");
Assert.assertNotNull(hiveConnGetter);
// 使用hudi的docker运行环境 https://hudi.apache.org/docs/docker_demo#step-3-sync-with-hive
Descriptor.FormData formData = new Descriptor.FormData();
formData.addProp("name", "testhiveConn");
formData.addProp("hiveAddress", "hiveserver:10000");
formData.addProp("useUserToken", "true");
formData.addProp("dbName", "default");
formData.addProp("password", "hive");
formData.addProp("userName", "hive");
formData.addProp("metaStoreUrls", "thrift://hiveserver:9083");
Descriptor.ParseDescribable<IHiveConnGetter> parseDescribable = hiveConnGetter.newInstance(HdfsFileSystemFactoryTestUtils.testDataXName.getName(), formData);
Assert.assertNotNull(parseDescribable.instance);
Assert.assertNotNull(parseDescribable.instance);
return parseDescribable.instance;
}
use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.
the class HudiDumpPostTask method writeSourceProps.
private void writeSourceProps(ITISFileSystem fs, IPath dumpDir, IPath fsSourcePropsPath) {
IPath fsSourceSchemaPath = HudiTableMeta.createFsSourceSchema(fs, this.hudiTab.getName(), dumpDir, this.hudiTab);
IPath tabDumpParentPath = TisDataXHudiWriter.createTabDumpParentPath(fs, dumpDir);
try (OutputStream write = fs.create(fsSourcePropsPath, true)) {
// TypedProperties props = new TypedProperties();
TypedPropertiesBuilder props = new TypedPropertiesBuilder();
String shuffleParallelism = String.valueOf(this.hudiWriter.shuffleParallelism);
props.setProperty("hoodie.upsert.shuffle.parallelism", shuffleParallelism);
props.setProperty("hoodie.insert.shuffle.parallelism", (shuffleParallelism));
props.setProperty("hoodie.delete.shuffle.parallelism", (shuffleParallelism));
props.setProperty("hoodie.bulkinsert.shuffle.parallelism", (shuffleParallelism));
props.setProperty("hoodie.embed.timeline.server", "true");
props.setProperty("hoodie.filesystem.view.type", "EMBEDDED_KV_STORE");
// @see HoodieCompactionConfig.INLINE_COMPACT
// props.setProperty("hoodie.compact.inline", (hudiTabType == HudiWriteTabType.MOR) ? "true" : "false");
// BasicFSWriter writerPlugin = this.getWriterPlugin();
// https://spark.apache.org/docs/3.2.1/sql-data-sources-csv.html
props.setProperty("hoodie.deltastreamer.source.dfs.root", String.valueOf(tabDumpParentPath));
props.setProperty("hoodie.deltastreamer.csv.header", Boolean.toString(TisDataXHudiWriter.CSV_FILE_USE_HEADER));
props.setProperty("hoodie.deltastreamer.csv.sep", String.valueOf(TisDataXHudiWriter.CSV_Column_Separator));
props.setProperty("hoodie.deltastreamer.csv.nullValue", TisDataXHudiWriter.CSV_NULL_VALUE);
props.setProperty("hoodie.deltastreamer.csv.escape", String.valueOf(TisDataXHudiWriter.CSV_ESCAPE_CHAR));
// props.setProperty("hoodie.deltastreamer.csv.escapeQuotes", "false");
props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", String.valueOf(fsSourceSchemaPath));
props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", String.valueOf(fsSourceSchemaPath));
// please reference: DataSourceWriteOptions , HiveSyncConfig
final IHiveConnGetter hiveMeta = this.hudiWriter.getHiveConnMeta();
props.setProperty("hoodie.datasource.hive_sync.database", hiveMeta.getDbName());
props.setProperty("hoodie.datasource.hive_sync.table", this.hudiTab.getName());
if (this.hudiTab.partition == null) {
throw new IllegalStateException("hudiPlugin.partitionedBy can not be empty");
}
this.hudiTab.partition.setProps(props, this.hudiWriter);
// props.setProperty("hoodie.datasource.hive_sync.partition_fields", hudiPlugin.partitionedBy);
// // "org.apache.hudi.hive.MultiPartKeysValueExtractor";
// // partition 分区值抽取类
// props.setProperty("hoodie.datasource.hive_sync.partition_extractor_class"
// , "org.apache.hudi.hive.MultiPartKeysValueExtractor");
Optional<HiveUserToken> hiveUserToken = hiveMeta.getUserToken();
if (hiveUserToken.isPresent()) {
HiveUserToken token = hiveUserToken.get();
props.setProperty("hoodie.datasource.hive_sync.username", token.userName);
props.setProperty("hoodie.datasource.hive_sync.password", token.password);
}
props.setProperty("hoodie.datasource.hive_sync.jdbcurl", hiveMeta.getJdbcUrl());
props.setProperty("hoodie.datasource.hive_sync.mode", "jdbc");
props.setProperty("hoodie.datasource.write.recordkey.field", this.hudiTab.recordField);
// props.setProperty("hoodie.datasource.write.partitionpath.field", hudiWriter.partitionedBy);
props.store(write);
} catch (IOException e) {
throw new RuntimeException("faild to write " + tabDumpParentPath + " CSV file metaData", e);
}
}
use of com.qlangtech.tis.config.hive.IHiveConnGetter in project plugins by qlangtech.
the class HudiTableMeta method getHistoryBatchs.
public static List<Option> getHistoryBatchs(ITISFileSystem fs, IHiveConnGetter hiveConn) {
IPath path = fs.getPath(fs.getRootDir(), hiveConn.getDbName());
List<IPathInfo> child = fs.listChildren(path);
return child.stream().map((c) -> new Option(c.getName())).collect(Collectors.toList());
}
Aggregations