use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class JobContainerTest method testPostHandlerByReader.
@Test
public void testPostHandlerByReader() throws Exception {
Configuration copyConfig = this.configuration.clone();
copyConfig.set(CoreConstant.DATAX_JOB_POSTHANDLER_PLUGINTYPE, "reader");
copyConfig.set(CoreConstant.DATAX_JOB_POSTHANDLER_PLUGINNAME, "fakereader");
JobContainer jobContainer = new JobContainer(copyConfig);
Method initMethod = jobContainer.getClass().getDeclaredMethod("postHandle");
initMethod.setAccessible(true);
initMethod.invoke(jobContainer, new Object[] {});
System.out.println(copyConfig.get("job.postHandler.test"));
Assert.assertEquals("readPostDone", copyConfig.get("job.postHandler.test"));
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class TaskGroupContainerTest method testRealTransformer.
@Test
public void testRealTransformer() {
LoadUtil.bind(configurationRealTransformer);
this.configurationRealTransformer.set("plugin.writer.fakewriter.class", FakeOneReader.class.getName());
this.configurationRealTransformer.set("plugin.writer.fakewriter.class", FakeLongTimeWriter.class.getName());
this.configurationRealTransformer.set(CoreConstant.DATAX_CORE_CONTAINER_TASKGROUP_CHANNEL, 1);
Configuration jobContent = this.configurationRealTransformer.getListConfiguration(CoreConstant.DATAX_JOB_CONTENT).get(0);
List<Configuration> jobContents = new ArrayList<Configuration>();
jobContents.add(jobContent);
this.configurationRealTransformer.set(CoreConstant.DATAX_JOB_CONTENT, jobContents);
TaskGroupContainer taskGroupContainer = new TaskGroupContainer(this.configurationRealTransformer);
taskGroupContainer.start();
Assert.assertTrue(State.SUCCEEDED == taskGroupContainer.getContainerCommunicator().collect().getState());
Communication res = null;
try {
Method com = TaskGroupContainer.class.getDeclaredMethod("reportTaskGroupCommunication", Communication.class, int.class);
com.setAccessible(true);
res = (Communication) com.invoke(taskGroupContainer, new Communication(), 1);
System.out.println("TaskGroup => " + CommunicationTool.Stringify.getSnapshot(res));
} catch (Exception e) {
e.printStackTrace();
}
Assert.assertTrue(res != null);
Assert.assertEquals(res.getLongCounter(CommunicationTool.TOTAL_READ_RECORDS).longValue(), 30);
Assert.assertEquals(res.getLongCounter(CommunicationTool.TRANSFORMER_SUCCEED_RECORDS).longValue(), 10);
Assert.assertEquals(res.getLongCounter(CommunicationTool.TRANSFORMER_FAILED_RECORDS).longValue(), 0);
Assert.assertEquals(res.getLongCounter(CommunicationTool.TRANSFORMER_FILTER_RECORDS).longValue(), 10);
Assert.assertTrue(res.getLongCounter(CommunicationTool.TRANSFORMER_USED_TIME).longValue() > 0);
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class Entry method doSplit.
public static List<Configuration> doSplit(Configuration originalSliceConfig, int adviceNumber, MongoClient mongoClient) {
List<Configuration> confList = new ArrayList<Configuration>();
String dbName = originalSliceConfig.getString(KeyConstant.MONGO_DB_NAME);
String collectionName = originalSliceConfig.getString(KeyConstant.MONGO_COLLECTION_NAME);
if (Strings.isNullOrEmpty(dbName) || Strings.isNullOrEmpty(collectionName) || mongoClient == null) {
throw DataXException.asDataXException(MongoDBReaderErrorCode.ILLEGAL_VALUE, MongoDBReaderErrorCode.ILLEGAL_VALUE.getDescription());
}
String query = originalSliceConfig.getString(KeyConstant.MONGO_QUERY);
MongoDatabase db = mongoClient.getDatabase(dbName);
MongoCollection collection = db.getCollection(collectionName);
List<Entry> countInterval = doSplitInterval(adviceNumber, collection, query);
for (Entry interval : countInterval) {
Configuration conf = originalSliceConfig.clone();
conf.set(KeyConstant.SKIP_COUNT, interval.interval);
conf.set(KeyConstant.BATCH_SIZE, interval.batchSize);
confList.add(conf);
}
return confList;
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class UnstructuredStorageReaderUtil method readFromStream.
public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
String compress = readerSliceConfig.getString(Key.COMPRESS, null);
if (StringUtils.isBlank(compress)) {
compress = null;
}
String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
// handle blank encoding
if (StringUtils.isBlank(encoding)) {
encoding = Constant.DEFAULT_ENCODING;
LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
}
List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
// handle ["*"] -> [], null
if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
readerSliceConfig.set(Key.COLUMN, null);
column = null;
}
BufferedReader reader = null;
int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
// compress logic
try {
if (null == compress) {
reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
} else {
// TODO compress
if ("lzo_deflate".equalsIgnoreCase(compress)) {
LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
} else if ("lzo".equalsIgnoreCase(compress)) {
LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
} else if ("gzip".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("bzip2".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
CompressionCodec snappyCodec = new SnappyCodec();
InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else if ("framing-snappy".equalsIgnoreCase(compress)) {
InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else /*else if ("xz".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new XZCompressorInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
compressorInputStream, encoding));
} else if ("ar".equalsIgnoreCase(compress)) {
ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arArchiveInputStream, encoding));
} else if ("arj".equalsIgnoreCase(compress)) {
ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arjArchiveInputStream, encoding));
} else if ("cpio".equalsIgnoreCase(compress)) {
CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
cpioArchiveInputStream, encoding));
} else if ("dump".equalsIgnoreCase(compress)) {
DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
dumpArchiveInputStream, encoding));
} else if ("jar".equalsIgnoreCase(compress)) {
JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
jarArchiveInputStream, encoding));
} else if ("tar".equalsIgnoreCase(compress)) {
TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
tarArchiveInputStream, encoding));
}*/
if ("zip".equalsIgnoreCase(compress)) {
ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
} else {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
}
}
UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
} catch (UnsupportedEncodingException uee) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
} catch (NullPointerException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
}/* catch (ArchiveException e) {
throw DataXException.asDataXException(
UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
String.format("压缩文件流读取错误 : [%s]", context), e);
} */
catch (IOException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
} finally {
IOUtils.closeQuietly(reader);
}
}
use of com.alibaba.datax.common.util.Configuration in project DataX by alibaba.
the class UnstructuredStorageWriterUtil method split.
public static List<Configuration> split(Configuration writerSliceConfig, Set<String> originAllFileExists, int mandatoryNumber) {
LOG.info("begin do split...");
Set<String> allFileExists = new HashSet<String>();
allFileExists.addAll(originAllFileExists);
List<Configuration> writerSplitConfigs = new ArrayList<Configuration>();
String filePrefix = writerSliceConfig.getString(Key.FILE_NAME);
String fileSuffix;
for (int i = 0; i < mandatoryNumber; i++) {
// handle same file name
Configuration splitedTaskConfig = writerSliceConfig.clone();
String fullFileName = null;
fileSuffix = UUID.randomUUID().toString().replace('-', '_');
fullFileName = String.format("%s__%s", filePrefix, fileSuffix);
while (allFileExists.contains(fullFileName)) {
fileSuffix = UUID.randomUUID().toString().replace('-', '_');
fullFileName = String.format("%s__%s", filePrefix, fileSuffix);
}
allFileExists.add(fullFileName);
splitedTaskConfig.set(Key.FILE_NAME, fullFileName);
LOG.info(String.format("splited write file name:[%s]", fullFileName));
writerSplitConfigs.add(splitedTaskConfig);
}
LOG.info("end do split.");
return writerSplitConfigs;
}
Aggregations