Search in sources :

Example 31 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class SinkWritePOperator method contributeRuntimeOperator.

@Override
public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema) throws AlgebricksException {
    WriteOperator write = (WriteOperator) op;
    int[] columns = new int[write.getExpressions().size()];
    int i = 0;
    for (Mutable<ILogicalExpression> exprRef : write.getExpressions()) {
        ILogicalExpression expr = exprRef.getValue();
        if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
            throw new NotImplementedException("Only writing variable expressions is supported.");
        }
        VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
        LogicalVariable v = varRef.getVariableReference();
        columns[i++] = inputSchemas[0].findVariable(v);
    }
    RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
    RecordDescriptor inputDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
    IPrinterFactory[] pf = JobGenHelper.mkPrinterFactories(inputSchemas[0], context.getTypeEnvironment(op), context, columns);
    IMetadataProvider<?, ?> mp = context.getMetadataProvider();
    Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> runtime = mp.getWriteFileRuntime(write.getDataSink(), columns, pf, inputDesc);
    builder.contributeMicroOperator(write, runtime.first, recDesc, runtime.second);
    ILogicalOperator src = write.getInputs().get(0).getValue();
    builder.contributeGraphEdge(src, 0, write, 0);
}
Also used : LogicalVariable(org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) NotImplementedException(org.apache.hyracks.algebricks.common.exceptions.NotImplementedException) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) ILogicalExpression(org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression) IPrinterFactory(org.apache.hyracks.algebricks.data.IPrinterFactory) VariableReferenceExpression(org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression) WriteOperator(org.apache.hyracks.algebricks.core.algebra.operators.logical.WriteOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)

Example 32 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class PigletMetadataProvider method getWriteFileRuntime.

@Override
public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriteFileRuntime(IDataSink sink, int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc) throws AlgebricksException {
    PigletFileDataSink ds = (PigletFileDataSink) sink;
    FileSplit[] fileSplits = ds.getFileSplits();
    String[] locations = new String[fileSplits.length];
    for (int i = 0; i < fileSplits.length; ++i) {
        locations[i] = fileSplits[i].getNodeName();
    }
    IPushRuntimeFactory prf;
    try {
        prf = new SinkWriterRuntimeFactory(printColumns, printerFactories, fileSplits[0].getFile(null), PrinterBasedWriterFactory.INSTANCE, inputDesc);
        AlgebricksAbsolutePartitionConstraint constraint = new AlgebricksAbsolutePartitionConstraint(locations);
        return new Pair<>(prf, constraint);
    } catch (HyracksDataException e) {
        throw new AlgebricksException(e);
    }
}
Also used : SinkWriterRuntimeFactory(org.apache.hyracks.algebricks.runtime.operators.std.SinkWriterRuntimeFactory) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) AlgebricksException(org.apache.hyracks.algebricks.common.exceptions.AlgebricksException) FileSplit(org.apache.hyracks.api.io.FileSplit) IPushRuntimeFactory(org.apache.hyracks.algebricks.runtime.base.IPushRuntimeFactory) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 33 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class JobBuilder method setPartitionConstraintsBottomup.

private void setPartitionConstraintsBottomup(OperatorDescriptorId opId, Map<IConnectorDescriptor, TargetConstraint> tgtConstraints, IOperatorDescriptor parentOp, boolean finalPass) {
    List<IConnectorDescriptor> opInputs = jobSpec.getOperatorInputMap().get(opId);
    AlgebricksPartitionConstraint opConstraint = null;
    IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(opId);
    if (opInputs != null) {
        for (IConnectorDescriptor conn : opInputs) {
            ConnectorDescriptorId cid = conn.getConnectorId();
            org.apache.commons.lang3.tuple.Pair<org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>, org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>> p = jobSpec.getConnectorOperatorMap().get(cid);
            IOperatorDescriptor src = p.getLeft().getLeft();
            // Pre-order DFS
            setPartitionConstraintsBottomup(src.getOperatorId(), tgtConstraints, opDesc, finalPass);
            TargetConstraint constraint = tgtConstraints.get(conn);
            if (constraint != null) {
                switch(constraint) {
                    case ONE:
                        opConstraint = countOneLocation;
                        break;
                    case SAME_COUNT:
                        opConstraint = partitionConstraintMap.get(src);
                        break;
                }
            }
        }
    }
    if (partitionConstraintMap.get(opDesc) == null) {
        if (finalPass && opConstraint == null && (opInputs == null || opInputs.isEmpty())) {
            opConstraint = countOneLocation;
        }
        if (finalPass && opConstraint == null) {
            opConstraint = clusterLocations;
        }
        // Sets up the location constraint.
        if (opConstraint != null) {
            partitionConstraintMap.put(opDesc, opConstraint);
            AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(jobSpec, opDesc, opConstraint);
        }
    }
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 34 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class JobBuilder method contributeAlgebricksPartitionConstraint.

@Override
public void contributeAlgebricksPartitionConstraint(IOperatorDescriptor opDesc, AlgebricksPartitionConstraint apcArg) {
    AlgebricksPartitionConstraint apc = apcArg;
    if (apc.getPartitionConstraintType() == PartitionConstraintType.COUNT) {
        AlgebricksCountPartitionConstraint constraint = (AlgebricksCountPartitionConstraint) apc;
        if (constraint.getCount() == 1) {
            apc = countOneLocation;
        }
    }
    partitionConstraintMap.put(opDesc, apc);
}
Also used : AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksCountPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint)

Example 35 with AlgebricksPartitionConstraint

use of org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint in project asterixdb by apache.

the class StoragePathUtil method splitProviderAndPartitionConstraints.

public static Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitProviderAndPartitionConstraints(FileSplit[] splits) {
    IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
    String[] loc = new String[splits.length];
    for (int p = 0; p < splits.length; p++) {
        loc[p] = splits[p].getNodeName();
    }
    AlgebricksPartitionConstraint pc = new AlgebricksAbsolutePartitionConstraint(loc);
    return new Pair<>(splitProvider, pc);
}
Also used : AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Aggregations

AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)58 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)30 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)23 JobSpecification (org.apache.hyracks.api.job.JobSpecification)23 IIndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory)22 IndexDataflowHelperFactory (org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory)22 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)20 AlgebricksException (org.apache.hyracks.algebricks.common.exceptions.AlgebricksException)19 Pair (org.apache.hyracks.algebricks.common.utils.Pair)19 Index (org.apache.asterix.metadata.entities.Index)15 MetadataException (org.apache.asterix.metadata.MetadataException)14 AlgebricksAbsolutePartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint)14 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)14 Dataset (org.apache.asterix.metadata.entities.Dataset)12 LogicalVariable (org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable)11 IVariableTypeEnvironment (org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment)10 DatasetCardinalityHint (org.apache.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint)9 AsterixException (org.apache.asterix.common.exceptions.AsterixException)8 IDataSourceIndex (org.apache.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex)8 IMetadataProvider (org.apache.hyracks.algebricks.core.algebra.metadata.IMetadataProvider)8