use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.
the class PushRuntimeTest method scanSortGbySelectWrite.
@Test
public void scanSortGbySelectWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
// the scanner
FileSplit[] fileSplits = new FileSplit[1];
fileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "customer.tbl");
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), scannerDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanner, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
// the sort (by nation id)
RecordDescriptor sortDesc = scannerDesc;
InMemorySortOperatorDescriptor sort = new InMemorySortOperatorDescriptor(spec, new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, sortDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sort, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
// the group-by
NestedTupleSourceRuntimeFactory nts = new NestedTupleSourceRuntimeFactory();
RecordDescriptor ntsDesc = sortDesc;
AggregateRuntimeFactory agg = new AggregateRuntimeFactory(new IAggregateEvaluatorFactory[] { new TupleCountAggregateFunctionFactory() });
RecordDescriptor aggDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
AlgebricksPipeline pipeline = new AlgebricksPipeline(new IPushRuntimeFactory[] { nts, agg }, new RecordDescriptor[] { ntsDesc, aggDesc });
NestedPlansAccumulatingAggregatorFactory npaaf = new NestedPlansAccumulatingAggregatorFactory(new AlgebricksPipeline[] { pipeline }, new int[] { 3 }, new int[] {});
RecordDescriptor gbyDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor gby = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 3 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, npaaf, gbyDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, gby, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
// the algebricks op.
IScalarEvaluatorFactory cond = new IntegerEqualsEvalFactory(new IntegerConstantEvalFactory(3), // Canadian customers
new TupleFieldEvaluatorFactory(0));
StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(cond, new int[] { 1 }, BinaryBooleanInspectorImpl.FACTORY, false, -1, null);
RecordDescriptor selectDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
String filePath = PATH_ACTUAL + SEPARATOR + "scanSortGbySelectWrite.out";
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, selectDesc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { select, writer }, new RecordDescriptor[] { selectDesc, null });
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, new String[] { AlgebricksHyracksIntegrationUtil.NC1_ID });
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, sort, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sort, 0, gby, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), gby, 0, algebricksOp, 0);
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
StringBuilder buf = new StringBuilder();
readFileToString(outFile, buf);
Assert.assertEquals("9", buf.toString());
outFile.delete();
}
use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.
the class PushRuntimeTest method etsAssignSubplanProjectWrite.
@Test
public void etsAssignSubplanProjectWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
IntegerConstantEvalFactory const1 = new IntegerConstantEvalFactory(400);
IntegerConstantEvalFactory const2 = new IntegerConstantEvalFactory(3);
EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
AssignRuntimeFactory assign1 = new AssignRuntimeFactory(new int[] { 0 }, new IScalarEvaluatorFactory[] { const1 }, new int[] { 0 });
RecordDescriptor assign1Desc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
NestedTupleSourceRuntimeFactory nts = new NestedTupleSourceRuntimeFactory();
AssignRuntimeFactory assign2 = new AssignRuntimeFactory(new int[] { 1 }, new IScalarEvaluatorFactory[] { new IntegerAddEvalFactory(new TupleFieldEvaluatorFactory(0), const2) }, new int[] { 0, 1 });
RecordDescriptor assign2Desc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
StreamProjectRuntimeFactory project1 = new StreamProjectRuntimeFactory(new int[] { 1 });
RecordDescriptor project1Desc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
AlgebricksPipeline pipeline = new AlgebricksPipeline(new IPushRuntimeFactory[] { nts, assign2, project1 }, new RecordDescriptor[] { assign1Desc, assign2Desc, project1Desc });
SubplanRuntimeFactory subplan = new SubplanRuntimeFactory(pipeline, new IMissingWriterFactory[] { NoopMissingWriterFactory.INSTANCE }, assign1Desc, null);
RecordDescriptor subplanDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
StreamProjectRuntimeFactory project2 = new StreamProjectRuntimeFactory(new int[] { 1 });
RecordDescriptor project2Desc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
String filePath = PATH_ACTUAL + SEPARATOR + "etsAssignSubplanProjectWrite.out";
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, project2Desc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, assign1, subplan, project2, writer }, new RecordDescriptor[] { etsDesc, assign1Desc, subplanDesc, project2Desc, null });
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, DEFAULT_NODES);
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
StringBuilder buf = new StringBuilder();
readFileToString(outFile, buf);
Assert.assertEquals("403", buf.toString());
outFile.delete();
}
use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.
the class PushRuntimeTest method etsAssignProjectWrite.
@Test
public void etsAssignProjectWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
IntegerConstantEvalFactory const1 = new IntegerConstantEvalFactory(400);
IntegerConstantEvalFactory const2 = new IntegerConstantEvalFactory(3);
EmptyTupleSourceRuntimeFactory ets = new EmptyTupleSourceRuntimeFactory();
RecordDescriptor etsDesc = new RecordDescriptor(new ISerializerDeserializer[] {});
AssignRuntimeFactory assign = new AssignRuntimeFactory(new int[] { 0, 1 }, new IScalarEvaluatorFactory[] { const1, const2 }, new int[] { 0, 1 });
RecordDescriptor assignDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
StreamProjectRuntimeFactory project = new StreamProjectRuntimeFactory(new int[] { 1 });
RecordDescriptor projectDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
String filePath = PATH_ACTUAL + SEPARATOR + "etsAssignProjectWrite.out";
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, projectDesc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 0, 0, new IPushRuntimeFactory[] { ets, assign, project, writer }, new RecordDescriptor[] { etsDesc, assignDesc, projectDesc, null });
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, algebricksOp, DEFAULT_NODES);
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
StringBuilder buf = new StringBuilder();
readFileToString(outFile, buf);
Assert.assertEquals("3", buf.toString());
outFile.delete();
}
use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.
the class SecondaryBTreeOperationsHelper method buildLoadingJobSpec.
@Override
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
int[] fieldPermutation = createFieldPermutationForBulkLoadOp(index.getKeyFieldNames().size());
IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
/*
* In case of external data,
* this method is used to build loading jobs for both initial load on index creation
* and transaction load on dataset referesh
*/
// Create external indexing scan operator
ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
// Assign op.
AbstractOperatorDescriptor sourceOp = primaryScanOp;
if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
sourceOp = createCastOp(spec, dataset.getDatasetType());
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
}
AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
// If any of the secondary fields are nullable, then add a select op that filters nulls.
AlgebricksMetaOperatorDescriptor selectOp = null;
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
}
// Sort by secondary keys.
ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
// Create secondary BTree bulk load op.
AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
IOperatorDescriptor root;
if (externalFiles != null) {
// Transaction load
secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
} else {
// Initial load
secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
}
AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
root = metaOp;
spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
} else {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
}
spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
spec.addRoot(root);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
return spec;
} else {
// Create dummy key provider for feeding the primary index scan.
IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
// Create primary index scan op.
IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
// Assign op.
IOperatorDescriptor sourceOp = primaryScanOp;
if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
sourceOp = createCastOp(spec, dataset.getDatasetType());
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
}
AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
// If any of the secondary fields are nullable, then add a select op that filters nulls.
AlgebricksMetaOperatorDescriptor selectOp = null;
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
selectOp = createFilterNullsSelectOp(spec, index.getKeyFieldNames().size(), secondaryRecDesc);
}
// Sort by secondary keys.
ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
// Create secondary BTree bulk load op.
TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
// Connect the operators.
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
} else {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
}
spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
spec.addRoot(metaOp);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
return spec;
}
}
use of org.apache.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor in project asterixdb by apache.
the class SecondaryRTreeOperationsHelper method buildLoadingJobSpec.
@Override
public JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException {
/***************************************************
* [ About PointMBR Optimization ]
* Instead of storing a MBR(4 doubles) for a point(2 doubles) in RTree leaf node,
* PointMBR concept is introduced.
* PointMBR is a way to store a point as 2 doubles in RTree leaf node.
* This reduces RTree index size roughly in half.
* In order to fully benefit from the PointMBR concept, besides RTree,
* external sort operator during bulk-loading (from either data loading or index creation)
* must deal with point as 2 doubles instead of 4 doubles. Otherwise, external sort will suffer from twice as
* many doubles as it actually requires. For this purpose,
* PointMBR specific optimization logic is added as follows:
* 1) CreateMBR function in assign operator generates 2 doubles, instead of 4 doubles.
* 2) External sort operator sorts points represented with 2 doubles.
* 3) Bulk-loading in RTree takes 4 doubles by reading 2 doubles twice and then,
* do the same work as non-point MBR cases.
***************************************************/
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
int[] fieldPermutation = createFieldPermutationForBulkLoadOp(numNestedSecondaryKeyFields);
int numNestedSecondaryKeFieldsConsideringPointMBR = isPointMBR ? numNestedSecondaryKeyFields / 2 : numNestedSecondaryKeyFields;
RecordDescriptor secondaryRecDescConsideringPointMBR = isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc;
boolean isEnforcingKeyTypes = index.isEnforcingKeyFileds();
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider);
if (dataset.getDatasetType() == DatasetType.INTERNAL) {
// Create dummy key provider for feeding the primary index scan.
IOperatorDescriptor keyProviderOp = DatasetUtil.createDummyKeyProviderOp(spec, dataset, metadataProvider);
JobId jobId = IndexUtil.bindJobEventListener(spec, metadataProvider);
// Create primary index scan op.
IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, dataset, jobId);
// Assign op.
IOperatorDescriptor sourceOp = primaryScanOp;
if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
sourceOp = createCastOp(spec, dataset.getDatasetType());
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
}
AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
// If any of the secondary fields are nullable, then add a select op that filters nulls.
AlgebricksMetaOperatorDescriptor selectOp = null;
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
}
// Sort by secondary keys.
ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
// Create secondary RTree bulk load op.
TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] {});
// Connect the operators.
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
} else {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
}
spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
spec.addRoot(metaOp);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
} else {
// External dataset
/*
* In case of external data, this method is used to build loading jobs for both
* initial load on index creation
* and transaction load on dataset referesh
*/
// Create external indexing scan operator
ExternalScanOperatorDescriptor primaryScanOp = createExternalIndexingOp(spec);
AbstractOperatorDescriptor sourceOp = primaryScanOp;
if (isEnforcingKeyTypes && !enforcedItemType.equals(itemType)) {
sourceOp = createCastOp(spec, dataset.getDatasetType());
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0);
}
// Assign op.
AlgebricksMetaOperatorDescriptor asterixAssignOp = createExternalAssignOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
// If any of the secondary fields are nullable, then add a select op that filters nulls.
AlgebricksMetaOperatorDescriptor selectOp = null;
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
selectOp = createFilterNullsSelectOp(spec, numNestedSecondaryKeFieldsConsideringPointMBR, secondaryRecDescConsideringPointMBR);
}
// Sort by secondary keys.
ExternalSortOperatorDescriptor sortOp = createSortOp(spec, new IBinaryComparatorFactory[] { MetadataProvider.proposeLinearizer(keyType, secondaryComparatorFactories.length) }, isPointMBR ? secondaryRecDescForPointMBR : secondaryRecDesc);
// Create secondary RTree bulk load op.
IOperatorDescriptor root;
AbstractSingleActivityOperatorDescriptor secondaryBulkLoadOp;
if (externalFiles != null) {
// Transaction load
secondaryBulkLoadOp = createExternalIndexBulkModifyOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
} else {
// Initial load
secondaryBulkLoadOp = createExternalIndexBulkLoadOp(spec, fieldPermutation, indexDataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR);
}
AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc });
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryBulkLoadOp, 0, metaOp, 0);
root = metaOp;
spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0);
if (anySecondaryKeyIsNullable || isEnforcingKeyTypes) {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
} else {
spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
}
spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
spec.addRoot(root);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
}
return spec;
}
Aggregations