use of org.apache.hyracks.hdfs.lib.TextTupleWriterFactory in project asterixdb by apache.
the class DataflowTest method testHDFSReadWriteOperators.
/**
* Test a job with only HDFS read and writes.
*
* @throws Exception
*/
public void testHDFSReadWriteOperators() throws Exception {
FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
conf.setInputFormat(TextInputFormat.class);
Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
InputSplit[] splits = conf.getInputFormat().getSplits(conf, numberOfNC * 4);
String[] readSchedule = scheduler.getLocationConstraints(splits);
JobSpecification jobSpec = new JobSpecification();
RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
jobSpec.addRoot(writeOperator);
IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
JobId jobId = client.startJob(jobSpec);
client.waitForCompletion(jobId);
Assert.assertEquals(true, checkResults());
}
use of org.apache.hyracks.hdfs.lib.TextTupleWriterFactory in project asterixdb by apache.
the class DataflowTest method testHDFSReadWriteOperators.
/**
* Test a job with only HDFS read and writes.
*
* @throws Exception
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testHDFSReadWriteOperators() throws Exception {
FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
conf.setInputFormatClass(TextInputFormat.class);
Scheduler scheduler = new Scheduler(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
InputFormat inputFormat = ReflectionUtils.newInstance(conf.getInputFormatClass(), getConfiguration());
List<InputSplit> splits = inputFormat.getSplits(conf);
String[] readSchedule = scheduler.getLocationConstraints(splits);
JobSpecification jobSpec = new JobSpecification();
RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID, HyracksUtils.NC2_ID };
HDFSReadOperatorDescriptor readOperator = new HDFSReadOperatorDescriptor(jobSpec, recordDesc, conf, splits, readSchedule, new TextKeyValueParserFactory());
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, readOperator, locations);
ExternalSortOperatorDescriptor sortOperator = new ExternalSortOperatorDescriptor(jobSpec, 10, new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, recordDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, sortOperator, locations);
HDFSWriteOperatorDescriptor writeOperator = new HDFSWriteOperatorDescriptor(jobSpec, conf, new TextTupleWriterFactory());
PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, writeOperator, HyracksUtils.NC1_ID);
jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), readOperator, 0, sortOperator, 0);
jobSpec.connect(new MToNPartitioningMergingConnectorDescriptor(jobSpec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { RawBinaryHashFunctionFactory.INSTANCE }), new int[] { 0 }, new IBinaryComparatorFactory[] { RawBinaryComparatorFactory.INSTANCE }, null), sortOperator, 0, writeOperator, 0);
jobSpec.addRoot(writeOperator);
IHyracksClientConnection client = new HyracksConnection(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT);
JobId jobId = client.startJob(jobSpec);
client.waitForCompletion(jobId);
Assert.assertEquals(true, checkResults());
}
Aggregations