Search in sources :

Example 1 with IPath

use of com.qlangtech.tis.fs.IPath in project plugins by qlangtech.

the class TestDataXHudiWriter method testFlinkSqlTableDDLCreate.

@Test
public void testFlinkSqlTableDDLCreate() throws Exception {
    FileSystemFactory fsFactory = EasyMock.createMock("fsFactory", FileSystemFactory.class);
    ITISFileSystem fs = EasyMock.createMock("fileSystem", ITISFileSystem.class);
    // fs.getRootDir()
    String child = "default/customer_order_relation";
    String dataDir = "hudi";
    IPath rootPath = new HdfsPath(HdfsFileSystemFactoryTestUtils.DEFAULT_HDFS_ADDRESS + "/user/admin");
    IPath tabPath = new HdfsPath(rootPath, child);
    IPath hudiDataPath = new HdfsPath(tabPath, dataDir);
    EasyMock.expect(fs.getPath(rootPath, child)).andReturn(tabPath);
    EasyMock.expect(fs.getPath(tabPath, dataDir)).andReturn(hudiDataPath);
    EasyMock.expect(fs.getRootDir()).andReturn(rootPath);
    EasyMock.expect(fsFactory.getFileSystem()).andReturn(fs);
    HudiTest forTest = createDataXWriter(Optional.of(fsFactory));
    DataxProcessor dataXProcessor = EasyMock.mock("dataXProcessor", DataxProcessor.class);
    File dataXCfg = folder.newFile();
    FileUtils.writeStringToFile(dataXCfg, "{job:{content:[{\"writer\":" + IOUtils.loadResourceFromClasspath(this.getClass(), hudi_datax_writer_assert_without_optional) + "}]}}", TisUTF8.get());
    List<File> dataXFiles = Lists.newArrayList(dataXCfg);
    EasyMock.expect(dataXProcessor.getDataxCfgFileNames(null)).andReturn(dataXFiles);
    DataxProcessor.processorGetter = (dataXName) -> {
        Assert.assertEquals(HdfsFileSystemFactoryTestUtils.testDataXName.getName(), dataXName);
        return dataXProcessor;
    };
    EasyMock.replay(dataXProcessor, fsFactory, fs);
    // IStreamTableCreator.IStreamTableMeta
    // streamTableMeta = forTest.writer.getStreamTableMeta(HudiWriter.targetTableName);
    // Assert.assertNotNull("streamTableMeta can not be null", streamTableMeta);
    // streamTableMeta.getColsMeta();
    // System.out.println(streamTableMeta.createFlinkTableDDL());
    // DataXHudiWriter.HudiStreamTemplateData tplData
    // = (DataXHudiWriter.HudiStreamTemplateData) forTest.writer.decorateMergeData(
    // new TestStreamTemplateData(HdfsFileSystemFactoryTestUtils.testDataXName, HudiWriter.targetTableName));
    // 
    // 
    // StringBuffer createTabDdl = tplData.getSinkFlinkTableDDL(HudiWriter.targetTableName);
    // Assert.assertNotNull(createTabDdl);
    // 
    // System.out.println(createTabDdl);
    EasyMock.verify(dataXProcessor, fsFactory, fs);
}
Also used : DataxProcessor(com.qlangtech.tis.datax.impl.DataxProcessor) IDataxProcessor(com.qlangtech.tis.datax.IDataxProcessor) HdfsPath(com.qlangtech.tis.hdfs.impl.HdfsPath) IPath(com.qlangtech.tis.fs.IPath) ITISFileSystem(com.qlangtech.tis.fs.ITISFileSystem) File(java.io.File) FileSystemFactory(com.qlangtech.tis.offline.FileSystemFactory) Test(org.junit.Test)

Example 2 with IPath

use of com.qlangtech.tis.fs.IPath in project plugins by qlangtech.

the class HudiDumpPostTask method writeSourceProps.

private void writeSourceProps(ITISFileSystem fs, IPath dumpDir, IPath fsSourcePropsPath) {
    IPath fsSourceSchemaPath = HudiTableMeta.createFsSourceSchema(fs, this.hudiTab.getName(), dumpDir, this.hudiTab);
    IPath tabDumpParentPath = TisDataXHudiWriter.createTabDumpParentPath(fs, dumpDir);
    try (OutputStream write = fs.create(fsSourcePropsPath, true)) {
        // TypedProperties props = new TypedProperties();
        TypedPropertiesBuilder props = new TypedPropertiesBuilder();
        String shuffleParallelism = String.valueOf(this.hudiWriter.shuffleParallelism);
        props.setProperty("hoodie.upsert.shuffle.parallelism", shuffleParallelism);
        props.setProperty("hoodie.insert.shuffle.parallelism", (shuffleParallelism));
        props.setProperty("hoodie.delete.shuffle.parallelism", (shuffleParallelism));
        props.setProperty("hoodie.bulkinsert.shuffle.parallelism", (shuffleParallelism));
        props.setProperty("hoodie.embed.timeline.server", "true");
        props.setProperty("hoodie.filesystem.view.type", "EMBEDDED_KV_STORE");
        // @see HoodieCompactionConfig.INLINE_COMPACT
        // props.setProperty("hoodie.compact.inline", (hudiTabType == HudiWriteTabType.MOR) ? "true" : "false");
        // BasicFSWriter writerPlugin = this.getWriterPlugin();
        // https://spark.apache.org/docs/3.2.1/sql-data-sources-csv.html
        props.setProperty("hoodie.deltastreamer.source.dfs.root", String.valueOf(tabDumpParentPath));
        props.setProperty("hoodie.deltastreamer.csv.header", Boolean.toString(TisDataXHudiWriter.CSV_FILE_USE_HEADER));
        props.setProperty("hoodie.deltastreamer.csv.sep", String.valueOf(TisDataXHudiWriter.CSV_Column_Separator));
        props.setProperty("hoodie.deltastreamer.csv.nullValue", TisDataXHudiWriter.CSV_NULL_VALUE);
        props.setProperty("hoodie.deltastreamer.csv.escape", String.valueOf(TisDataXHudiWriter.CSV_ESCAPE_CHAR));
        // props.setProperty("hoodie.deltastreamer.csv.escapeQuotes", "false");
        props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", String.valueOf(fsSourceSchemaPath));
        props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", String.valueOf(fsSourceSchemaPath));
        // please reference: DataSourceWriteOptions , HiveSyncConfig
        final IHiveConnGetter hiveMeta = this.hudiWriter.getHiveConnMeta();
        props.setProperty("hoodie.datasource.hive_sync.database", hiveMeta.getDbName());
        props.setProperty("hoodie.datasource.hive_sync.table", this.hudiTab.getName());
        if (this.hudiTab.partition == null) {
            throw new IllegalStateException("hudiPlugin.partitionedBy can not be empty");
        }
        this.hudiTab.partition.setProps(props, this.hudiWriter);
        // props.setProperty("hoodie.datasource.hive_sync.partition_fields", hudiPlugin.partitionedBy);
        // // "org.apache.hudi.hive.MultiPartKeysValueExtractor";
        // // partition 分区值抽取类
        // props.setProperty("hoodie.datasource.hive_sync.partition_extractor_class"
        // , "org.apache.hudi.hive.MultiPartKeysValueExtractor");
        Optional<HiveUserToken> hiveUserToken = hiveMeta.getUserToken();
        if (hiveUserToken.isPresent()) {
            HiveUserToken token = hiveUserToken.get();
            props.setProperty("hoodie.datasource.hive_sync.username", token.userName);
            props.setProperty("hoodie.datasource.hive_sync.password", token.password);
        }
        props.setProperty("hoodie.datasource.hive_sync.jdbcurl", hiveMeta.getJdbcUrl());
        props.setProperty("hoodie.datasource.hive_sync.mode", "jdbc");
        props.setProperty("hoodie.datasource.write.recordkey.field", this.hudiTab.recordField);
        // props.setProperty("hoodie.datasource.write.partitionpath.field", hudiWriter.partitionedBy);
        props.store(write);
    } catch (IOException e) {
        throw new RuntimeException("faild to write " + tabDumpParentPath + " CSV file metaData", e);
    }
}
Also used : HiveUserToken(com.qlangtech.tis.config.hive.HiveUserToken) IPath(com.qlangtech.tis.fs.IPath) OutputStream(java.io.OutputStream) TypedPropertiesBuilder(com.alibaba.datax.plugin.writer.hudi.TypedPropertiesBuilder) IOException(java.io.IOException) IHiveConnGetter(com.qlangtech.tis.config.hive.IHiveConnGetter)

Example 3 with IPath

use of com.qlangtech.tis.fs.IPath in project plugins by qlangtech.

the class JoinHiveTask method processJoinTask.

/**
 * 处理join表,是否需要自动创建表或者删除重新创建表
 *
 * @param sql
 */
private void processJoinTask(String sql) {
    try {
        final HiveInsertFromSelectParser insertParser = getSQLParserResult(sql);
        final Connection conn = this.getTaskContext().getObj();
        // final DumpTable dumpTable =
        // DumpTable.createTable(insertParser.getTargetTableName());
        final EntityName dumpTable = EntityName.parse(this.getName());
        final String path = FSHistoryFileUtils.getJoinTableStorePath(fileSystem.getRootDir(), dumpTable).replaceAll("\\.", Path.SEPARATOR);
        if (fileSystem == null) {
            throw new IllegalStateException("fileSys can not be null");
        }
        ITISFileSystem fs = fileSystem;
        IPath parent = fs.getPath(path);
        initializeHiveTable(this.fileSystem, parent, mrEngine, HdfsFormat.DEFAULT_FORMAT, insertParser.getCols(), insertParser.getColsExcludePartitionCols(), conn, dumpTable, ITableDumpConstant.MAX_PARTITION_SAVE);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : EntityName(com.qlangtech.tis.sql.parser.tuple.creator.EntityName) IPath(com.qlangtech.tis.fs.IPath) ITISFileSystem(com.qlangtech.tis.fs.ITISFileSystem) Connection(java.sql.Connection) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveInsertFromSelectParser(com.qlangtech.tis.hive.HiveInsertFromSelectParser)

Example 4 with IPath

use of com.qlangtech.tis.fs.IPath in project tis by qlangtech.

the class SnapshotDomain method writeResource2fs.

/**
 * @param
 * @param coreName
 * @return
 * @throws
 */
public void writeResource2fs(ITISFileSystem fs, String coreName, PropteryGetter getter) {
    // fs.getRootDir() + "/" + coreName + "/config/" + getter.getFileName();
    String path = getter.getFsPath(fs, coreName);
    IPath dst = fs.getPath(path);
    if (dst == null) {
        throw new IllegalStateException("path can not be create:" + path);
    }
    OutputStream dstoutput = null;
    try {
        dstoutput = fs.create(dst, true);
        IOUtils.write(getter.getContent(this), dstoutput);
    } catch (IOException e1) {
        throw new RuntimeException("[ERROR] Submit Service Core  Schema.xml to HDFS Failure !!!!", e1);
    } finally {
        IOUtils.closeQuietly(dstoutput);
    }
}
Also used : IPath(com.qlangtech.tis.fs.IPath) OutputStream(java.io.OutputStream) IOException(java.io.IOException)

Example 5 with IPath

use of com.qlangtech.tis.fs.IPath in project plugins by qlangtech.

the class RemoveJoinHistoryDataTask method deleteHistoryJoinTable.

/**
 * 删除宽表历史数据
 *
 * @param dumpTable
 * @throws Exception
 */
public static void deleteHistoryJoinTable(EntityName dumpTable, ITISFileSystem fileSys, Integer partitionRetainNum) throws Exception {
    final String path = FSHistoryFileUtils.getJoinTableStorePath(fileSys.getRootDir(), dumpTable).replaceAll("\\.", Path.SEPARATOR);
    if (fileSys == null) {
        throw new IllegalStateException("fileSys can not be null");
    }
    ITISFileSystem fs = fileSys;
    // new Path(hdfsPath);
    IPath parent = fs.getPath(path);
    if (!fs.exists(parent)) {
        return;
    }
    List<IPathInfo> child = fs.listChildren(parent);
    FSHistoryFileUtils.PathInfo pathinfo;
    List<PathInfo> timestampList = new ArrayList<>();
    Matcher matcher;
    for (IPathInfo c : child) {
        matcher = ITISFileSystem.DATE_PATTERN.matcher(c.getPath().getName());
        if (matcher.find()) {
            pathinfo = new PathInfo();
            pathinfo.setPathName(c.getPath().getName());
            pathinfo.setTimeStamp(Long.parseLong(matcher.group()));
            timestampList.add(pathinfo);
        }
    }
    FSHistoryFileUtils.deleteOldHdfsfile(fs, parent, timestampList, partitionRetainNum);
}
Also used : IPathInfo(com.qlangtech.tis.fs.IPathInfo) IPath(com.qlangtech.tis.fs.IPath) Matcher(java.util.regex.Matcher) ITISFileSystem(com.qlangtech.tis.fs.ITISFileSystem) ArrayList(java.util.ArrayList) PathInfo(com.qlangtech.tis.fs.FSHistoryFileUtils.PathInfo) IPathInfo(com.qlangtech.tis.fs.IPathInfo) PathInfo(com.qlangtech.tis.fs.FSHistoryFileUtils.PathInfo) FSHistoryFileUtils(com.qlangtech.tis.fs.FSHistoryFileUtils)

Aggregations

IPath (com.qlangtech.tis.fs.IPath)9 ITISFileSystem (com.qlangtech.tis.fs.ITISFileSystem)6 OutputStream (java.io.OutputStream)4 IOException (java.io.IOException)3 HdfsColMeta (com.alibaba.datax.plugin.writer.hdfswriter.HdfsColMeta)2 IHiveConnGetter (com.qlangtech.tis.config.hive.IHiveConnGetter)2 IPathInfo (com.qlangtech.tis.fs.IPathInfo)2 FileSystemFactory (com.qlangtech.tis.offline.FileSystemFactory)2 DataType (com.qlangtech.tis.plugin.ds.DataType)2 ISelectedTab (com.qlangtech.tis.plugin.ds.ISelectedTab)2 Schema (org.apache.avro.Schema)2 SchemaBuilder (org.apache.avro.SchemaBuilder)2 Configuration (com.alibaba.datax.common.util.Configuration)1 HdfsWriterErrorCode (com.alibaba.datax.plugin.writer.hdfswriter.HdfsWriterErrorCode)1 Key (com.alibaba.datax.plugin.writer.hdfswriter.Key)1 TypedPropertiesBuilder (com.alibaba.datax.plugin.writer.hudi.TypedPropertiesBuilder)1 HiveUserToken (com.qlangtech.tis.config.hive.HiveUserToken)1 IDataxProcessor (com.qlangtech.tis.datax.IDataxProcessor)1 DataxProcessor (com.qlangtech.tis.datax.impl.DataxProcessor)1 FSHistoryFileUtils (com.qlangtech.tis.fs.FSHistoryFileUtils)1