use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.
the class AggregationTest method singleKeyAvgExtGroupTest.
@Test
public void singleKeyAvgExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int frameLimits = 5;
int tableSize = 8;
long fileSize = frameLimits * spec.getFrameSize();
ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, tableSize, fileSize, keyFields, frameLimits, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false), new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new AvgFieldMergeAggregatorFactory(3, false) }), outputRec, outputRec, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.
the class TestNodeController method getFullScanPipeline.
public IPushRuntime getFullScanPipeline(IFrameWriter countOp, IHyracksTaskContext ctx, Dataset dataset, IAType[] primaryKeyTypes, ARecordType recordType, ARecordType metaType, NoMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, int[] filterFields, int[] primaryKeyIndexes, List<Integer> primaryKeyIndicators, StorageComponentProvider storageComponentProvider) throws HyracksDataException, AlgebricksException {
IPushRuntime emptyTupleOp = new EmptyTupleSourceRuntimeFactory().createPushRuntime(ctx);
JobSpecification spec = new JobSpecification();
PrimaryIndexInfo primaryIndexInfo = new PrimaryIndexInfo(dataset, primaryKeyTypes, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterFields, primaryKeyIndexes, primaryKeyIndicators, storageComponentProvider);
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), primaryIndexInfo.fileSplitProvider);
BTreeSearchOperatorDescriptor searchOpDesc = new BTreeSearchOperatorDescriptor(spec, primaryIndexInfo.rDesc, null, null, true, true, indexDataflowHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, filterFields, filterFields, false);
BTreeSearchOperatorNodePushable searchOp = searchOpDesc.createPushRuntime(ctx, primaryIndexInfo.getSearchRecordDescriptorProvider(), PARTITION, 1);
emptyTupleOp.setFrameWriter(0, searchOp, primaryIndexInfo.getSearchRecordDescriptorProvider().getInputRecordDescriptor(null, 0));
searchOp.setOutputFrameWriter(0, countOp, primaryIndexInfo.rDesc);
return emptyTupleOp;
}
use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.
the class RebalanceUtil method populateDataToRebalanceTarget.
// Populates the data from the source dataset to the rebalance target dataset.
private static void populateDataToRebalanceTarget(Dataset source, Dataset target, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception {
JobSpecification spec = new JobSpecification();
JobId jobId = JobIdFactory.generateJobId();
JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(jobId, true);
spec.setJobletEventListenerFactory(jobEventListenerFactory);
// The pipeline starter.
IOperatorDescriptor starter = DatasetUtil.createDummyKeyProviderOp(spec, source, metadataProvider);
// Creates primary index scan op.
IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, source, jobId);
// Creates secondary BTree upsert op.
IOperatorDescriptor upsertOp = createPrimaryIndexUpsertOp(spec, metadataProvider, source, target);
// The final commit operator.
IOperatorDescriptor commitOp = createUpsertCommitOp(spec, metadataProvider, jobId, target);
// Connects empty-tuple-source and scan.
spec.connect(new OneToOneConnectorDescriptor(spec), starter, 0, primaryScanOp, 0);
// Connects scan and upsert.
int numKeys = target.getPrimaryKeys().size();
int[] keys = IntStream.range(0, numKeys).toArray();
IConnectorDescriptor connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, target.getPrimaryHashFunctionFactories(metadataProvider)));
spec.connect(connectorDescriptor, primaryScanOp, 0, upsertOp, 0);
// Connects upsert and sink.
spec.connect(new OneToOneConnectorDescriptor(spec), upsertOp, 0, commitOp, 0);
// Executes the job.
JobUtils.runJob(hcc, spec, true);
}
use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.
the class DatasetUtil method dropDatasetJobSpec.
public static JobSpecification dropDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException, HyracksDataException, RemoteException, ACIDException {
LOGGER.info("DROP DATASET: " + dataset);
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
return RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
}
JobSpecification specPrimary = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
IndexDropOperatorDescriptor primaryBtreeDrop = new IndexDropOperatorDescriptor(specPrimary, indexHelperFactory);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(specPrimary, primaryBtreeDrop, splitsAndConstraint.second);
specPrimary.addRoot(primaryBtreeDrop);
return specPrimary;
}
use of org.apache.hyracks.api.job.JobSpecification in project asterixdb by apache.
the class ExternalIndexingOperations method buildAbortOp.
public static JobSpecification buildAbortOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
AlgebricksPartitionConstraint constraints = null;
for (Index index : indexes) {
IFileSplitProvider indexSplitProvider;
if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
indexSplitProvider = sAndConstraints.first;
constraints = sAndConstraints.second;
} else {
indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
}
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
treeDataflowHelperFactories.add(indexDataflowHelperFactory);
}
ExternalDatasetIndexesAbortOperatorDescriptor op = new ExternalDatasetIndexesAbortOperatorDescriptor(spec, treeDataflowHelperFactories);
spec.addRoot(op);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
return spec;
}
Aggregations