use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class LangExpressionToPlanTranslator method translate.
public ILogicalPlan translate(Query expr, String outputDatasetName, ICompiledDmlStatement stmt, ILogicalOperator baseOp) throws AlgebricksException {
MutableObject<ILogicalOperator> base = new MutableObject<>(new EmptyTupleSourceOperator());
if (baseOp != null) {
base = new MutableObject<>(baseOp);
}
Pair<ILogicalOperator, LogicalVariable> p = expr.accept(this, base);
ArrayList<Mutable<ILogicalOperator>> globalPlanRoots = new ArrayList<>();
ILogicalOperator topOp = p.first;
List<LogicalVariable> liveVars = new ArrayList<>();
VariableUtilities.getLiveVariables(topOp, liveVars);
LogicalVariable unnestVar = liveVars.get(0);
LogicalVariable resVar = unnestVar;
if (outputDatasetName == null) {
FileSplit outputFileSplit = metadataProvider.getOutputFile();
if (outputFileSplit == null) {
outputFileSplit = getDefaultOutputFileLocation(metadataProvider.getApplicationContext());
}
metadataProvider.setOutputFile(outputFileSplit);
List<Mutable<ILogicalExpression>> writeExprList = new ArrayList<>(1);
writeExprList.add(new MutableObject<>(new VariableReferenceExpression(resVar)));
ResultSetSinkId rssId = new ResultSetSinkId(metadataProvider.getResultSetId());
ResultSetDataSink sink = new ResultSetDataSink(rssId, null);
DistributeResultOperator newTop = new DistributeResultOperator(writeExprList, sink);
newTop.getInputs().add(new MutableObject<>(topOp));
topOp = newTop;
// Retrieve the Output RecordType (if any) and store it on
// the DistributeResultOperator
IAType outputRecordType = metadataProvider.findOutputRecordType();
if (outputRecordType != null) {
topOp.getAnnotations().put("output-record-type", outputRecordType);
}
} else {
/**
* add the collection-to-sequence right before the project,
* because dataset only accept non-collection records
*/
LogicalVariable seqVar = context.newVar();
/**
* This assign adds a marker function collection-to-sequence: if the input is a singleton collection, unnest
* it; otherwise do nothing.
*/
AssignOperator assignCollectionToSequence = new AssignOperator(seqVar, new MutableObject<>(new ScalarFunctionCallExpression(FunctionUtil.getFunctionInfo(BuiltinFunctions.COLLECTION_TO_SEQUENCE), new MutableObject<>(new VariableReferenceExpression(resVar)))));
assignCollectionToSequence.getInputs().add(new MutableObject<>(topOp.getInputs().get(0).getValue()));
topOp.getInputs().get(0).setValue(assignCollectionToSequence);
ProjectOperator projectOperator = (ProjectOperator) topOp;
projectOperator.getVariables().set(0, seqVar);
resVar = seqVar;
DatasetDataSource targetDatasource = validateDatasetInfo(metadataProvider, stmt.getDataverseName(), stmt.getDatasetName());
List<Integer> keySourceIndicator = ((InternalDatasetDetails) targetDatasource.getDataset().getDatasetDetails()).getKeySourceIndicator();
ArrayList<LogicalVariable> vars = new ArrayList<>();
ArrayList<Mutable<ILogicalExpression>> exprs = new ArrayList<>();
List<Mutable<ILogicalExpression>> varRefsForLoading = new ArrayList<>();
List<List<String>> partitionKeys = targetDatasource.getDataset().getPrimaryKeys();
int numOfPrimaryKeys = partitionKeys.size();
for (int i = 0; i < numOfPrimaryKeys; i++) {
if (keySourceIndicator == null || keySourceIndicator.get(i).intValue() == 0) {
// record part
PlanTranslationUtil.prepareVarAndExpression(partitionKeys.get(i), resVar, vars, exprs, varRefsForLoading, context);
} else {
// meta part
PlanTranslationUtil.prepareMetaKeyAccessExpression(partitionKeys.get(i), unnestVar, exprs, vars, varRefsForLoading, context);
}
}
AssignOperator assign = new AssignOperator(vars, exprs);
List<String> additionalFilteringField = DatasetUtil.getFilterField(targetDatasource.getDataset());
List<LogicalVariable> additionalFilteringVars;
List<Mutable<ILogicalExpression>> additionalFilteringAssignExpressions;
List<Mutable<ILogicalExpression>> additionalFilteringExpressions = null;
AssignOperator additionalFilteringAssign = null;
if (additionalFilteringField != null) {
additionalFilteringVars = new ArrayList<>();
additionalFilteringAssignExpressions = new ArrayList<>();
additionalFilteringExpressions = new ArrayList<>();
PlanTranslationUtil.prepareVarAndExpression(additionalFilteringField, resVar, additionalFilteringVars, additionalFilteringAssignExpressions, additionalFilteringExpressions, context);
additionalFilteringAssign = new AssignOperator(additionalFilteringVars, additionalFilteringAssignExpressions);
additionalFilteringAssign.getInputs().add(new MutableObject<>(topOp));
assign.getInputs().add(new MutableObject<>(additionalFilteringAssign));
} else {
assign.getInputs().add(new MutableObject<>(topOp));
}
Mutable<ILogicalExpression> varRef = new MutableObject<>(new VariableReferenceExpression(resVar));
ILogicalOperator leafOperator;
switch(stmt.getKind()) {
case Statement.Kind.INSERT:
leafOperator = translateInsert(targetDatasource, varRef, varRefsForLoading, additionalFilteringExpressions, assign, stmt);
break;
case Statement.Kind.UPSERT:
leafOperator = translateUpsert(targetDatasource, varRef, varRefsForLoading, additionalFilteringExpressions, assign, additionalFilteringField, unnestVar, topOp, exprs, resVar, additionalFilteringAssign, stmt);
break;
case Statement.Kind.DELETE:
leafOperator = translateDelete(targetDatasource, varRef, varRefsForLoading, additionalFilteringExpressions, assign);
break;
case Statement.Kind.CONNECT_FEED:
leafOperator = translateConnectFeed(targetDatasource, varRef, varRefsForLoading, additionalFilteringExpressions, assign);
break;
case Statement.Kind.SUBSCRIBE_FEED:
leafOperator = translateSubscribeFeed((CompiledSubscribeFeedStatement) stmt, targetDatasource, unnestVar, topOp, exprs, resVar, varRefsForLoading, varRef, assign, additionalFilteringField, additionalFilteringAssign, additionalFilteringExpressions);
break;
default:
throw new AlgebricksException("Unsupported statement kind " + stmt.getKind());
}
topOp = leafOperator;
}
globalPlanRoots.add(new MutableObject<>(topOp));
ILogicalPlan plan = new ALogicalPlanImpl(globalPlanRoots);
eliminateSharedOperatorReferenceForPlan(plan);
return plan;
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class AbstractRTreeOperatorTest method insertPipeline.
protected void insertPipeline() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "orders-with-locations-part2.txt") };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE, DoubleParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
// insert into primary index
int[] primaryFieldPermutation = { 0, 1, 2, 4, 5, 7, 9, 10, 11, 12 };
TreeIndexInsertUpdateDeleteOperatorDescriptor primaryInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, ordersDesc, primaryFieldPermutation, IndexOperation.INSERT, primaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryInsertOp, NC1_ID);
// secondary index
int[] secondaryFieldPermutation = { 9, 10, 11, 12, 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor secondaryInsertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(spec, ordersDesc, secondaryFieldPermutation, IndexOperation.INSERT, secondaryHelperFactory, null, NoOpOperationCallbackFactory.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryInsertOp, NC1_ID);
NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, nullSink, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, primaryInsertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryInsertOp, 0, secondaryInsertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), secondaryInsertOp, 0, nullSink, 0);
spec.addRoot(nullSink);
runTest(spec);
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class SleepOperatorDescriptor method jobWithSleepOp.
private JobSpecification jobWithSleepOp() {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(ASTERIX_IDS[0], "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl") };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor recordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
// File scan operator.
FileScanOperatorDescriptor scanOp = new FileScanOperatorDescriptor(spec, ordersSplitsProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), recordDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scanOp, ASTERIX_IDS[0]);
// Sleep operator.
SleepOperatorDescriptor sleepOp = new SleepOperatorDescriptor(spec);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sleepOp, ASTERIX_IDS);
// Sink operator.
SinkOperatorDescriptor sinkOp = new SinkOperatorDescriptor(spec, 1);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sinkOp, ASTERIX_IDS);
// Hash-repartitioning connector.
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, scanOp, 0, sleepOp, 0);
// One-to-one connector.
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, sleepOp, 0, sinkOp, 0);
return spec;
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class HeapSortMergeTest method optimizedSortMergeTest01.
@Test
public void optimizedSortMergeTest01() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
// larger than the total record numbers.
int outputLimit = 5;
TopKSorterOperatorDescriptor sorter = new TopKSorterOperatorDescriptor(spec, 4, outputLimit, new int[] { 1, 0 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
ResultSetId rsId = new ResultSetId(1);
spec.addResultSetId(rsId);
FileSplit fs = createFile(nc1);
IFileSplitProvider outputSplitProvider = new ConstantFileSplitProvider(new FileSplit[] { fs });
IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outputSplitProvider, "|");
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1, 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, printer, 0);
runTest(spec);
System.out.println("Result write into :" + fs.getPath() + " in node: " + fs.getNodeName());
}
use of org.apache.hyracks.api.io.FileSplit in project asterixdb by apache.
the class HeapSortMergeTest method createSortMergeJobSpec.
public static JobSpecification createSortMergeJobSpec() throws Exception {
JobSpecification spec = new JobSpecification();
FileSplit[] ordersSplits = new FileSplit[] { new ManagedFileSplit(NC1_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part1.tbl"), new ManagedFileSplit(NC2_ID, "data" + File.separator + "tpch0.001" + File.separator + "orders-part2.tbl") };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
int outputLimit = 20;
TopKSorterOperatorDescriptor sorter = new TopKSorterOperatorDescriptor(spec, 4, outputLimit, new int[] { 1, 0 }, null, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
LimitOperatorDescriptor filter = new LimitOperatorDescriptor(spec, ordersDesc, outputLimit);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, filter, NC1_ID);
ResultSetId rsId = new ResultSetId(1);
spec.addResultSetId(rsId);
IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false, false, ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(new int[] { 1, 0 }, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY), PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), new int[] { 1, 0 }, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, filter, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), filter, 0, printer, 0);
return spec;
}
Aggregations