use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class CountOfCountsTest method countOfCountsMultiNC.
@Test
public void countOfCountsMultiNC() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
InMemorySortOperatorDescriptor sorter = new InMemorySortOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, sorter, 0);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sorter, 0, group, 0);
IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn3, group, 0, sorter2, 0);
IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn4, sorter2, 0, group2, 0);
IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(conn5, group2, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class CountOfCountsTest method countOfCountsExternalSortMultiNC.
@Test
public void countOfCountsExternalSortMultiNC() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] splits = new FileSplit[] { new ManagedFileSplit(NC2_ID, "data" + File.separator + "words.txt") };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, ','), desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, 3, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter2, NC1_ID, NC2_ID);
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, sorter, 0);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sorter, 0, group, 0);
IConnectorDescriptor conn3 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn3, group, 0, sorter2, 0);
IConnectorDescriptor conn4 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn4, sorter2, 0, group2, 0);
IConnectorDescriptor conn5 = new MToNBroadcastConnectorDescriptor(spec);
spec.connect(conn5, group2, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class RebalanceUtil method populateDataToRebalanceTarget.
// Populates the data from the source dataset to the rebalance target dataset.
private static void populateDataToRebalanceTarget(Dataset source, Dataset target, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception {
JobSpecification spec = new JobSpecification();
JobId jobId = JobIdFactory.generateJobId();
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
spec.setJobletEventListenerFactory(jobEventListenerFactory);
// The pipeline starter.
IOperatorDescriptor starter = DatasetUtil.createDummyKeyProviderOp(spec, source, metadataProvider);
// Creates primary index scan op.
IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, source, jobId);
// Creates secondary BTree upsert op.
IOperatorDescriptor upsertOp = createPrimaryIndexUpsertOp(spec, metadataProvider, source, target);
// The final commit operator.
IOperatorDescriptor commitOp = createUpsertCommitOp(spec, metadataProvider, jobId, target);
// Connects empty-tuple-source and scan.
spec.connect(new OneToOneConnectorDescriptor(spec), starter, 0, primaryScanOp, 0);
// Connects scan and upsert.
int numKeys = target.getPrimaryKeys().size();
int[] keys = IntStream.range(0, numKeys).toArray();
IConnectorDescriptor connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, target.getPrimaryHashFunctionFactories(metadataProvider)));
spec.connect(connectorDescriptor, primaryScanOp, 0, upsertOp, 0);
// Connects upsert and sink.
spec.connect(new OneToOneConnectorDescriptor(spec), upsertOp, 0, commitOp, 0);
// Executes the job.
JobUtils.runJob(hcc, spec, true);
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class AbstractPhysicalOperator method buildPipelineWithProjection.
private AlgebricksPipeline buildPipelineWithProjection(ILogicalPlan p, IOperatorSchema outerPlanSchema, AbstractOperatorWithNestedPlans npOp, IOperatorSchema opSchema, PlanCompiler pc) throws AlgebricksException {
if (p.getRoots().size() > 1) {
throw new NotImplementedException("Nested plans with several roots are not supported.");
}
JobSpecification nestedJob = pc.compilePlan(p, outerPlanSchema, null);
ILogicalOperator topOpInSubplan = p.getRoots().get(0).getValue();
JobGenContext context = pc.getContext();
IOperatorSchema topOpInSubplanScm = context.getSchema(topOpInSubplan);
opSchema.addAllVariables(topOpInSubplanScm);
Map<OperatorDescriptorId, IOperatorDescriptor> opMap = nestedJob.getOperatorMap();
if (opMap.size() != 1) {
throw new AlgebricksException("Attempting to construct a nested plan with " + opMap.size() + " operator descriptors. Currently, nested plans can only consist in linear pipelines of Asterix micro operators.");
}
for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> opEntry : opMap.entrySet()) {
IOperatorDescriptor opd = opEntry.getValue();
if (!(opd instanceof AlgebricksMetaOperatorDescriptor)) {
throw new AlgebricksException("Can only generate Hyracks jobs for pipelinable Asterix nested plans, not for " + opd.getClass().getName());
}
AlgebricksMetaOperatorDescriptor amod = (AlgebricksMetaOperatorDescriptor) opd;
return amod.getPipeline();
// we suppose that the top operator in the subplan already does the
// projection for us
}
throw new IllegalStateException();
}
use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.
the class PushRuntimeTest method scanSplitWrite.
@Test
public void scanSplitWrite() throws Exception {
final int outputArity = 2;
JobSpecification spec = new JobSpecification(FRAME_SIZE);
String[] inputFileName = { "data" + File.separator + "simple" + File.separator + "int-string-part1.tbl", "data" + File.separator + "simple" + File.separator + "int-string-part1-split-0.tbl", "data" + File.separator + "simple" + File.separator + "int-string-part1-split-1.tbl" };
File[] inputFiles = new File[inputFileName.length];
for (int i = 0; i < inputFileName.length; i++) {
inputFiles[i] = new File(inputFileName[i]);
}
File[] outputFile = new File[outputArity];
FileSplit[] outputFileSplit = new FileSplit[outputArity];
for (int i = 0; i < outputArity; i++) {
outputFileSplit[i] = createFile(AlgebricksHyracksIntegrationUtil.nc1);
outputFile[i] = outputFileSplit[i].getFile(AlgebricksHyracksIntegrationUtil.nc1.getIoManager());
}
FileSplit[] inputSplits = new FileSplit[] { new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, inputFileName[0]) };
IFileSplitProvider intSplitProvider = new ConstantFileSplitProvider(inputSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
FileScanOperatorDescriptor intScanner = new FileScanOperatorDescriptor(spec, intSplitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, intScanner, DEFAULT_NODES);
SplitOperatorDescriptor splitOp = new SplitOperatorDescriptor(spec, scannerDesc, outputArity, new TupleFieldEvaluatorFactory(0), BinaryIntegerInspectorImpl.FACTORY);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, splitOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
IOperatorDescriptor[] outputOp = new IOperatorDescriptor[outputFile.length];
for (int i = 0; i < outputArity; i++) {
outputOp[i] = new LineFileWriteOperatorDescriptor(spec, new FileSplit[] { outputFileSplit[i] });
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, outputOp[i], new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
}
spec.connect(new OneToOneConnectorDescriptor(spec), intScanner, 0, splitOp, 0);
for (int i = 0; i < outputArity; i++) {
spec.connect(new OneToOneConnectorDescriptor(spec), splitOp, i, outputOp[i], 0);
}
for (int i = 0; i < outputArity; i++) {
spec.addRoot(outputOp[i]);
}
AlgebricksHyracksIntegrationUtil.runJob(spec);
for (int i = 0; i < outputArity; i++) {
compareFiles("data" + File.separator + "device0" + File.separator + inputFileName[i + 1], outputFile[i].getAbsolutePath());
}
}
Aggregations