use of org.apache.hyracks.dataflow.std.file.IFileSplitProvider in project asterixdb by apache.
the class AbstractBTreeOperatorTest method insertPipeline.
protected void insertPipeline(boolean useUpsert) throws Exception {
IndexOperation pipelineOperation = useUpsert ? IndexOperation.UPSERT : IndexOperation.INSERT;
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.002" + File.separator + "orders-part2.tbl") };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = inputRecordDesc;
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(inputParserFactories, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
// insert into primary index
TreeIndexInsertUpdateDeleteOperatorDescriptor primaryBtreeInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, ordersDesc, primaryFieldPermutation, pipelineOperation, primaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeInsertOp, NC1_ID);
// first secondary index
int[] fieldPermutationB = secondaryFieldPermutationB;
TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, ordersDesc, fieldPermutationB, pipelineOperation, secondaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryInsertOp, NC1_ID);
NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nullSink, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, primaryBtreeInsertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryBtreeInsertOp, 0, secondaryInsertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryInsertOp, 0, nullSink, 0);
spec.addRoot(nullSink);
runTest(spec);
}
use of org.apache.hyracks.dataflow.std.file.IFileSplitProvider in project asterixdb by apache.
the class DatasetUtil method dropDatasetJobSpec.
public static JobSpecification dropDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException, HyracksDataException, RemoteException, ACIDException {
LOGGER.info("DROP DATASET: " + dataset);
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
return RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
}
JobSpecification specPrimary = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first);
IndexDropOperatorDescriptor primaryBtreeDrop = new IndexDropOperatorDescriptor(specPrimary, indexHelperFactory);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(specPrimary, primaryBtreeDrop, splitsAndConstraint.second);
specPrimary.addRoot(primaryBtreeDrop);
return specPrimary;
}
use of org.apache.hyracks.dataflow.std.file.IFileSplitProvider in project asterixdb by apache.
the class DatasetUtil method createDummyKeyProviderOp.
/**
* Creates a dummy key provider operator for the primary index scan.
*
* @param spec,
* the job specification.
* @param dataset,
* the dataset to scan.
* @param metadataProvider,
* the metadata provider.
* @return a dummy key provider operator.
* @throws AlgebricksException
*/
public static IOperatorDescriptor createDummyKeyProviderOp(JobSpecification spec, Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException {
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset);
AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
// Build dummy tuple containing one field with a dummy value inside.
ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
DataOutput dos = tb.getDataOutput();
tb.reset();
try {
// Serialize dummy value into a field.
IntegerSerializerDeserializer.INSTANCE.serialize(0, dos);
} catch (HyracksDataException e) {
throw new AsterixException(e);
}
// Add dummy field.
tb.addFieldEndOffset();
ISerializerDeserializer[] keyRecDescSers = { IntegerSerializerDeserializer.INSTANCE };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec, keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, keyProviderOp, primaryPartitionConstraint);
return keyProviderOp;
}
use of org.apache.hyracks.dataflow.std.file.IFileSplitProvider in project asterixdb by apache.
the class ExternalIndexingOperations method buildAbortOp.
public static JobSpecification buildAbortOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException {
JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext());
IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager();
ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>();
AlgebricksPartitionConstraint constraints = null;
for (Index index : indexes) {
IFileSplitProvider indexSplitProvider;
if (isValidIndexName(index.getDatasetName(), index.getIndexName())) {
Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName());
indexSplitProvider = sAndConstraints.first;
constraints = sAndConstraints.second;
} else {
indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first;
}
IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider);
treeDataflowHelperFactories.add(indexDataflowHelperFactory);
}
ExternalDatasetIndexesAbortOperatorDescriptor op = new ExternalDatasetIndexesAbortOperatorDescriptor(spec, treeDataflowHelperFactories);
spec.addRoot(op);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints);
spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
return spec;
}
use of org.apache.hyracks.dataflow.std.file.IFileSplitProvider in project asterixdb by apache.
the class PushRuntimeTest method scanSelectWrite.
@Test
public void scanSelectWrite() throws Exception {
JobSpecification spec = new JobSpecification(FRAME_SIZE);
// the scanner
FileSplit[] intFileSplits = new FileSplit[1];
intFileSplits[0] = new ManagedFileSplit(AlgebricksHyracksIntegrationUtil.NC1_ID, "data" + File.separator + "simple" + File.separator + "int-part1.tbl");
IFileSplitProvider intSplitProvider = new ConstantFileSplitProvider(intFileSplits);
RecordDescriptor intScannerDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE };
FileScanOperatorDescriptor intScanner = new FileScanOperatorDescriptor(spec, intSplitProvider, new DelimitedDataTupleParserFactory(valueParsers, '|'), intScannerDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, intScanner, DEFAULT_NODES);
// the algebricks op.
IScalarEvaluatorFactory cond = new IntegerGreaterThanEvalFactory(new IntegerConstantEvalFactory(2), new TupleFieldEvaluatorFactory(0));
StreamSelectRuntimeFactory select = new StreamSelectRuntimeFactory(cond, new int[] { 0 }, BinaryBooleanInspectorImpl.FACTORY, false, -1, null);
RecordDescriptor selectDesc = intScannerDesc;
String filePath = PATH_ACTUAL + SEPARATOR + "scanSelectWrite.out";
File outFile = new File(filePath);
SinkWriterRuntimeFactory writer = new SinkWriterRuntimeFactory(new int[] { 0 }, new IPrinterFactory[] { IntegerPrinterFactory.INSTANCE }, outFile, PrinterBasedWriterFactory.INSTANCE, selectDesc);
AlgebricksMetaOperatorDescriptor algebricksOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, new IPushRuntimeFactory[] { select, writer }, new RecordDescriptor[] { selectDesc, null });
PartitionConstraintHelper.addPartitionCountConstraint(spec, algebricksOp, 1);
spec.connect(new OneToOneConnectorDescriptor(spec), intScanner, 0, algebricksOp, 0);
spec.addRoot(algebricksOp);
AlgebricksHyracksIntegrationUtil.runJob(spec);
StringBuilder buf = new StringBuilder();
readFileToString(outFile, buf);
Assert.assertEquals("0", buf.toString());
outFile.delete();
}
Aggregations