Search in sources :

Example 26 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class JobContainerTest method testPostHandlerByReader.

@Test
public void testPostHandlerByReader() throws Exception {
    Configuration copyConfig = this.configuration.clone();
    copyConfig.set(CoreConstant.DATAX_JOB_POSTHANDLER_PLUGINTYPE, "reader");
    copyConfig.set(CoreConstant.DATAX_JOB_POSTHANDLER_PLUGINNAME, "fakereader");
    JobContainer jobContainer = new JobContainer(copyConfig);
    Method initMethod = jobContainer.getClass().getDeclaredMethod("postHandle");
    initMethod.setAccessible(true);
    initMethod.invoke(jobContainer, new Object[] {});
    System.out.println(copyConfig.get("job.postHandler.test"));
    Assert.assertEquals("readPostDone", copyConfig.get("job.postHandler.test"));
}
Also used : JobContainer(com.alibaba.datax.core.job.JobContainer) Configuration(com.alibaba.datax.common.util.Configuration) Method(java.lang.reflect.Method) Test(org.junit.Test)

Example 27 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class TaskGroupContainerTest method testRealTransformer.

@Test
public void testRealTransformer() {
    LoadUtil.bind(configurationRealTransformer);
    this.configurationRealTransformer.set("plugin.writer.fakewriter.class", FakeOneReader.class.getName());
    this.configurationRealTransformer.set("plugin.writer.fakewriter.class", FakeLongTimeWriter.class.getName());
    this.configurationRealTransformer.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL, 1);
    Configuration jobContent = this.configurationRealTransformer.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT).get(0);
    List<Configuration> jobContents = new ArrayList<Configuration>();
    jobContents.add(jobContent);
    this.configurationRealTransformer.set(CoreConstant.DATAX_JOB_CONTENT, jobContents);
    TaskGroupContainer taskGroupContainer = new TaskGroupContainer(this.configurationRealTransformer);
    taskGroupContainer.start();
    Assert.assertTrue(State.SUCCEEDED == taskGroupContainer.getContainerCommunicator().collect().getState());
    Communication res = null;
    try {
        Method com = TaskGroupContainer.class.getDeclaredMethod("reportTaskGroupCommunication", Communication.class, int.class);
        com.setAccessible(true);
        res = (Communication) com.invoke(taskGroupContainer, new Communication(), 1);
        System.out.println("TaskGroup => " + CommunicationTool.Stringify.getSnapshot(res));
    } catch (Exception e) {
        e.printStackTrace();
    }
    Assert.assertTrue(res != null);
    Assert.assertEquals(res.getLongCounter(CommunicationTool.TOTAL_READ_RECORDS).longValue(), 30);
    Assert.assertEquals(res.getLongCounter(CommunicationTool.TRANSFORMER_SUCCEED_RECORDS).longValue(), 10);
    Assert.assertEquals(res.getLongCounter(CommunicationTool.TRANSFORMER_FAILED_RECORDS).longValue(), 0);
    Assert.assertEquals(res.getLongCounter(CommunicationTool.TRANSFORMER_FILTER_RECORDS).longValue(), 10);
    Assert.assertTrue(res.getLongCounter(CommunicationTool.TRANSFORMER_USED_TIME).longValue() > 0);
}
Also used : TaskGroupContainer(com.alibaba.datax.core.taskgroup.TaskGroupContainer) Configuration(com.alibaba.datax.common.util.Configuration) ArrayList(java.util.ArrayList) Method(java.lang.reflect.Method) Communication(com.alibaba.datax.core.statistics.communication.Communication) Test(org.junit.Test)

Example 28 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class Entry method doSplit.

public static List<Configuration> doSplit(Configuration originalSliceConfig, int adviceNumber, MongoClient mongoClient) {
    List<Configuration> confList = new ArrayList<Configuration>();
    String dbName = originalSliceConfig.getString(KeyConstant.MONGO_DB_NAME);
    String collectionName = originalSliceConfig.getString(KeyConstant.MONGO_COLLECTION_NAME);
    if (Strings.isNullOrEmpty(dbName) || Strings.isNullOrEmpty(collectionName) || mongoClient == null) {
        throw DataXException.asDataXException(MongoDBReaderErrorCode.ILLEGAL_VALUE, MongoDBReaderErrorCode.ILLEGAL_VALUE.getDescription());
    }
    String query = originalSliceConfig.getString(KeyConstant.MONGO_QUERY);
    MongoDatabase db = mongoClient.getDatabase(dbName);
    MongoCollection collection = db.getCollection(collectionName);
    List<Entry> countInterval = doSplitInterval(adviceNumber, collection, query);
    for (Entry interval : countInterval) {
        Configuration conf = originalSliceConfig.clone();
        conf.set(KeyConstant.SKIP_COUNT, interval.interval);
        conf.set(KeyConstant.BATCH_SIZE, interval.batchSize);
        confList.add(conf);
    }
    return confList;
}
Also used : MongoCollection(com.mongodb.client.MongoCollection) Configuration(com.alibaba.datax.common.util.Configuration) ArrayList(java.util.ArrayList) MongoDatabase(com.mongodb.client.MongoDatabase)

Example 29 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class UnstructuredStorageReaderUtil method readFromStream.

public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
    String compress = readerSliceConfig.getString(Key.COMPRESS, null);
    if (StringUtils.isBlank(compress)) {
        compress = null;
    }
    String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
    // handle blank encoding
    if (StringUtils.isBlank(encoding)) {
        encoding = Constant.DEFAULT_ENCODING;
        LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
    }
    List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
    // handle ["*"] -> [], null
    if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
        readerSliceConfig.set(Key.COLUMN, null);
        column = null;
    }
    BufferedReader reader = null;
    int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
    // compress logic
    try {
        if (null == compress) {
            reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
        } else {
            // TODO compress
            if ("lzo_deflate".equalsIgnoreCase(compress)) {
                LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
                reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
            } else if ("lzo".equalsIgnoreCase(compress)) {
                LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
            } else if ("gzip".equalsIgnoreCase(compress)) {
                CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
            } else if ("bzip2".equalsIgnoreCase(compress)) {
                CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
            } else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
                CompressionCodec snappyCodec = new SnappyCodec();
                InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
            } else if ("framing-snappy".equalsIgnoreCase(compress)) {
                InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
            } else /*else if ("xz".equalsIgnoreCase(compress)) {
					CompressorInputStream compressorInputStream = new XZCompressorInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							compressorInputStream, encoding));
				} else if ("ar".equalsIgnoreCase(compress)) {
					ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							arArchiveInputStream, encoding));
				} else if ("arj".equalsIgnoreCase(compress)) {
					ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							arjArchiveInputStream, encoding));
				} else if ("cpio".equalsIgnoreCase(compress)) {
					CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							cpioArchiveInputStream, encoding));
				} else if ("dump".equalsIgnoreCase(compress)) {
					DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							dumpArchiveInputStream, encoding));
				} else if ("jar".equalsIgnoreCase(compress)) {
					JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							jarArchiveInputStream, encoding));
				} else if ("tar".equalsIgnoreCase(compress)) {
					TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							tarArchiveInputStream, encoding));
				}*/
            if ("zip".equalsIgnoreCase(compress)) {
                ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
            } else {
                throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
            }
        }
        UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
    } catch (UnsupportedEncodingException uee) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
    } catch (NullPointerException e) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
    }/* catch (ArchiveException e) {
			throw DataXException.asDataXException(
					UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
					String.format("压缩文件流读取错误 : [%s]", context), e);
		} */
     catch (IOException e) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
    } finally {
        IOUtils.closeQuietly(reader);
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) Configuration(com.alibaba.datax.common.util.Configuration) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) SnappyFramedInputStream(io.airlift.compress.snappy.SnappyFramedInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) SnappyFramedInputStream(io.airlift.compress.snappy.SnappyFramedInputStream) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) SnappyCodec(io.airlift.compress.snappy.SnappyCodec)

Example 30 with Configuration

use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.

the class UnstructuredStorageWriterUtil method split.

public static List<Configuration> split(Configuration writerSliceConfig, Set<String> originAllFileExists, int mandatoryNumber) {
    LOG.info("begin do split...");
    Set<String> allFileExists = new HashSet<String>();
    allFileExists.addAll(originAllFileExists);
    List<Configuration> writerSplitConfigs = new ArrayList<Configuration>();
    String filePrefix = writerSliceConfig.getString(Key.FILE_NAME);
    String fileSuffix;
    for (int i = 0; i < mandatoryNumber; i++) {
        // handle same file name
        Configuration splitedTaskConfig = writerSliceConfig.clone();
        String fullFileName = null;
        fileSuffix = UUID.randomUUID().toString().replace('-', '_');
        fullFileName = String.format("%s__%s", filePrefix, fileSuffix);
        while (allFileExists.contains(fullFileName)) {
            fileSuffix = UUID.randomUUID().toString().replace('-', '_');
            fullFileName = String.format("%s__%s", filePrefix, fileSuffix);
        }
        allFileExists.add(fullFileName);
        splitedTaskConfig.set(Key.FILE_NAME, fullFileName);
        LOG.info(String.format("splited write file name:[%s]", fullFileName));
        writerSplitConfigs.add(splitedTaskConfig);
    }
    LOG.info("end do split.");
    return writerSplitConfigs;
}
Also used : Configuration(com.alibaba.datax.common.util.Configuration) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet)

Aggregations

Configuration (com.alibaba.datax.common.util.Configuration)82 ArrayList (java.util.ArrayList)27 Test (org.junit.Test)19 Communication (com.alibaba.datax.core.statistics.communication.Communication)13 DataXException (com.alibaba.datax.common.exception.DataXException)9 Method (java.lang.reflect.Method)8 Record (com.alibaba.datax.common.element.Record)7 JobContainer (com.alibaba.datax.core.job.JobContainer)6 IOException (java.io.IOException)5 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)5 LongColumn (com.alibaba.datax.common.element.LongColumn)4 TaskPluginCollector (com.alibaba.datax.common.plugin.TaskPluginCollector)4 TaskGroupContainer (com.alibaba.datax.core.taskgroup.TaskGroupContainer)4 Channel (com.alibaba.datax.core.transport.channel.Channel)4 MemoryChannel (com.alibaba.datax.core.transport.channel.memory.MemoryChannel)4 DefaultRecord (com.alibaba.datax.core.transport.record.DefaultRecord)4 File (java.io.File)4 HashSet (java.util.HashSet)3 List (java.util.List)3 VMInfo (com.alibaba.datax.common.statistics.VMInfo)2