use of org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory in project asterixdb by apache.
the class CountOfCountsTest method countOfCountsExternalSortMultiNC.
@Test
public void countOfCountsExternalSortMultiNC() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 3, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, sorter, 0);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sorter, 0, group, 0);
IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn3, group, 0, sorter2, 0);
IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn4, sorter2, 0, group2, 0);
IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(conn5, group2, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory in project asterixdb by apache.
the class PushRuntimeTest method scanSelectWrite.
@Test
public void scanSelectWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
// the scanner
FileSplit[] intFileSplits = new FileSplit[1];
intFileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "simple" + File.separator + "int-part1.tbl");
IFileSplitProvider intSplitProvider = new ConstantFileSplitProvider(intFileSplits);
RecordDescriptor intScannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE };
FileScanOperatorDescriptor intScanner = new FileScanOperatorDescriptor(spec, intSplitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), intScannerDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, intScanner, DEFAULT_NODES);
// the algebricks op.
IScalarEvaluatorFactory cond = new IntegerGreaterThanEvalFactory(new IntegerConstantEvalFactory(2), new TupleFieldEvaluatorFactory(0));
StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(cond, new int[] { 0 }, BinaryBooleanInspectorImpl.FACTORY, false, -1, null);
RecordDescriptor selectDesc = intScannerDesc;
String filePath = PATH_ACTUAL + SEPARATOR + "scanSelectWrite.out";
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, selectDesc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { select, writer }, new RecordDescriptor[] { selectDesc, null });
PartitionConstraintHelper.addPartitionCountConstraint(spec, algebricksOp, 1);
spec.connect(new OneToOneConnectorDescriptor(spec), intScanner, 0, algebricksOp, 0);
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
StringBuilder buf = new StringBuilder();
readFileToString(outFile, buf);
Assert.assertEquals("0", buf.toString());
outFile.delete();
}
use of org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory in project asterixdb by apache.
the class PushRuntimeTest method scanSplitWrite.
@Test
public void scanSplitWrite() throws Exception {
final int outputArity = 2;
JobSpecification spec = new JobSpecification(FRAME_SIZE);
String[] inputFileName = { "data" + File.separator + "simple" + File.separator + "int-string-part1.tbl", "data" + File.separator + "simple" + File.separator + "int-string-part1-split-0.tbl", "data" + File.separator + "simple" + File.separator + "int-string-part1-split-1.tbl" };
File[] inputFiles = new File[inputFileName.length];
for (int i = 0; i < inputFileName.length; i++) {
inputFiles[i] = new File(inputFileName[i]);
}
File[] outputFile = new File[outputArity];
FileSplit[] outputFileSplit = new FileSplit[outputArity];
for (int i = 0; i < outputArity; i++) {
outputFileSplit[i] = createFile(AlgebricksHyracksIntegrationUtil.nc1);
outputFile[i] = outputFileSplit[i].getFile(AlgebricksHyracksIntegrationUtil.nc1.getIoManager());
}
FileSplit[] inputSplits = new FileSplit[] { new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, inputFileName[0]) };
IFileSplitProvider intSplitProvider = new ConstantFileSplitProvider(inputSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
FileScanOperatorDescriptor intScanner = new FileScanOperatorDescriptor(spec, intSplitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, intScanner, DEFAULT_NODES);
SplitOperatorDescriptor splitOp = new SplitOperatorDescriptor(spec, scannerDesc, outputArity, new TupleFieldEvaluatorFactory(0), BinaryIntegerInspectorImpl.FACTORY);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, splitOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
IOperatorDescriptor[] outputOp = new IOperatorDescriptor[outputFile.length];
for (int i = 0; i < outputArity; i++) {
outputOp[i] = new LineFileWriteOperatorDescriptor(spec, new FileSplit[] { outputFileSplit[i] });
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, outputOp[i], new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
}
spec.connect(new OneToOneConnectorDescriptor(spec), intScanner, 0, splitOp, 0);
for (int i = 0; i < outputArity; i++) {
spec.connect(new OneToOneConnectorDescriptor(spec), splitOp, i, outputOp[i], 0);
}
for (int i = 0; i < outputArity; i++) {
spec.addRoot(outputOp[i]);
}
AlgebricksHyracksIntegrationUtil.runJob(spec);
for (int i = 0; i < outputArity; i++) {
compareFiles("data" + File.separator + "device0" + File.separator + inputFileName[i + 1], outputFile[i].getAbsolutePath());
}
}
use of org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory in project asterixdb by apache.
the class PushRuntimeTest method scanLimitWrite.
@Test
public void scanLimitWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
// the scanner
FileSplit[] fileSplits = new FileSplit[1];
fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl");
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, DEFAULT_NODES);
// the algebricks op.
StreamLimitRuntimeFactory limit = new StreamLimitRuntimeFactory(new IntegerConstantEvalFactory(2), null, new int[] { 0 }, BinaryIntegerInspectorImpl.FACTORY);
RecordDescriptor limitDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
String filePath = PATH_ACTUAL + SEPARATOR + "scanLimitWrite.out";
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, limitDesc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { limit, writer }, new RecordDescriptor[] { limitDesc, null });
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, algebricksOp, 0);
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
StringBuilder buf = new StringBuilder();
readFileToString(outFile, buf);
Assert.assertEquals("12", buf.toString());
outFile.delete();
}
use of org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory in project asterixdb by apache.
the class PushRuntimeTest method etsAssignScriptWrite.
@Test
public void etsAssignScriptWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
IntegerConstantEvalFactory const1 = new IntegerConstantEvalFactory(400);
IntegerConstantEvalFactory const2 = new IntegerConstantEvalFactory(3);
EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
AssignRuntimeFactory assign = new AssignRuntimeFactory(new int[] { 0, 1 }, new IScalarEvaluatorFactory[] { const1, const2 }, new int[] { 0, 1 });
RecordDescriptor assignDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
IValueParserFactory[] valueParsers = { IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE };
String osname = System.getProperty("os.name");
String command;
if (osname.equals("Linux")) {
command = "bash target/testscripts/idscript";
} else if (osname.startsWith("Windows")) {
command = "target\\testscripts\\idscript.cmd";
} else {
// don't know how to test
return;
}
StringStreamingRuntimeFactory script = new StringStreamingRuntimeFactory(command, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE }, ' ', new DelimitedDataTupleParserFactory(valueParsers, ' '));
RecordDescriptor scriptDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
String filePath = PATH_ACTUAL + SEPARATOR + "etsAssignScriptWrite.out";
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0, 1 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE, IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, scriptDesc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, assign, script, writer }, new RecordDescriptor[] { etsDesc, assignDesc, scriptDesc, null });
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
StringBuilder buf = new StringBuilder();
readFileToString(outFile, buf);
Assert.assertEquals("400; 3", buf.toString());
outFile.delete();
}
Aggregations