Search in sources :

Example 61 with BiConsumer

use of java.util.function.BiConsumer in project elki by elki-project.

the class ComputeKNNOutlierScores method run.

@Override
public void run() {
    final Database database = inputstep.getDatabase();
    final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
    // Ensure we don't go beyond the relation size:
    final int maxk = Math.min(this.maxk, relation.size() - 1);
    // Get a KNN query.
    final int lim = Math.min(maxk + 2, relation.size());
    KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, distf, lim);
    // Precompute kNN:
    if (!(knnq instanceof PreprocessorKNNQuery)) {
        MaterializeKNNPreprocessor<O> preproc = new MaterializeKNNPreprocessor<>(relation, distf, lim);
        preproc.initialize();
        relation.getHierarchy().add(relation, preproc);
    }
    // Test that we now get a proper index query
    knnq = QueryUtil.getKNNQuery(relation, distf, lim);
    if (!(knnq instanceof PreprocessorKNNQuery)) {
        throw new AbortException("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
    }
    // Warn for some known slow methods and large k:
    if (!isDisabled("LDOF") && maxk > 100) {
        LOG.verbose("Note: LODF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " LDOF to disable.");
    }
    if (!isDisabled("FastABOD") && maxk > 100) {
        LOG.warning("Note: FastABOD needs quadratic memory. Use -" + Parameterizer.DISABLE_ID.getName() + " FastABOD to disable.");
    }
    if (!isDisabled("DWOF") && maxk > 100) {
        LOG.warning("Note: DWOF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " DWOF to disable.");
    }
    final DBIDs ids = relation.getDBIDs();
    try (PrintStream fout = new PrintStream(outfile)) {
        // Control: print the DBIDs in case we are seeing an odd iteration
        // 
        fout.append("# Data set size: " + relation.size()).append(" data type: " + relation.getDataTypeInformation()).append(FormatUtil.NEWLINE);
        // Label outlier result (reference)
        writeResult(fout, ids, bylabel.run(database), new IdentityScaling(), "bylabel");
        final int startk = (this.startk > 0) ? this.startk : this.stepk;
        final int startkmin2 = (startk >= 2) ? startk : (startk + stepk);
        final int startkmin3 = (startk >= 3) ? startk : (startkmin2 >= 3) ? startkmin2 : (startkmin2 + stepk);
        // Output function:
        BiConsumer<String, OutlierResult> out = (kstr, result) -> writeResult(fout, ids, result, scaling, kstr);
        // KNN
        runForEachK(// 
        "KNN", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNOutlier<O>(distf, k).run(database, relation), out);
        // KNN Weight
        runForEachK(// 
        "KNNW", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNWeightOutlier<O>(distf, k).run(database, relation), out);
        // Run LOF
        runForEachK(// 
        "LOF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LOF<O>(k, distf).run(database, relation), out);
        // Run Simplified-LOF
        runForEachK(// 
        "SimplifiedLOF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new SimplifiedLOF<O>(k, distf).run(database, relation), out);
        // LoOP
        runForEachK(// 
        "LoOP", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LoOP<O>(k, k, distf, distf, 1.0).run(database, relation), out);
        // LDOF
        runForEachK(// 
        "LDOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new LDOF<O>(distf, k).run(database, relation), out);
        // Run ODIN
        runForEachK(// 
        "ODIN", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new ODIN<O>(distf, k).run(database, relation), out);
        // Run FastABOD
        runForEachK(// 
        "FastABOD", // 
        startkmin3, // 
        stepk, // 
        maxk, k -> // 
        new FastABOD<O>(new PolynomialKernelFunction(2), k).run(database, relation), out);
        // Run KDEOS with intrinsic dimensionality 2.
        runForEachK(// 
        "KDEOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> new // 
        KDEOS<O>(// 
        distf, // 
        k, // 
        k, // 
        GaussianKernelDensityFunction.KERNEL, // 
        0., 0.5 * GaussianKernelDensityFunction.KERNEL.canonicalBandwidth(), // 
        2).run(database, relation), out);
        // Run LDF
        runForEachK(// 
        "LDF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LDF<O>(k, distf, GaussianKernelDensityFunction.KERNEL, 1., .1).run(database, relation), out);
        // Run INFLO
        runForEachK(// 
        "INFLO", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new INFLO<O>(distf, 1.0, k).run(database, relation), out);
        // Run COF
        runForEachK(// 
        "COF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new COF<O>(k, distf).run(database, relation), out);
        // Run simple Intrinsic dimensionality
        runForEachK(// 
        "Intrinsic", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new IntrinsicDimensionalityOutlier<O>(distf, k, AggregatedHillEstimator.STATIC).run(database, relation), out);
        // Run IDOS
        runForEachK(// 
        "IDOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new IDOS<O>(distf, AggregatedHillEstimator.STATIC, k, k).run(database, relation), out);
        // Run simple kernel-density LOF variant
        runForEachK(// 
        "KDLOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new SimpleKernelDensityLOF<O>(k, distf, GaussianKernelDensityFunction.KERNEL).run(database, relation), out);
        // Run DWOF (need pairwise distances, too)
        runForEachK(// 
        "DWOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new DWOF<O>(distf, k, 1.1).run(database, relation), out);
        // Run LIC
        runForEachK(// 
        "LIC", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LocalIsolationCoefficient<O>(distf, k).run(database, relation), out);
        // Run VOV (requires a vector field).
        if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(relation.getDataTypeInformation())) {
            @SuppressWarnings("unchecked") final DistanceFunction<? super DoubleVector> df = (DistanceFunction<? super DoubleVector>) distf;
            @SuppressWarnings("unchecked") final Relation<DoubleVector> rel = (Relation<DoubleVector>) (Relation<?>) relation;
            runForEachK(// 
            "VOV", // 
            startk, // 
            stepk, // 
            maxk, k -> // 
            new VarianceOfVolume<DoubleVector>(k, df).run(database, rel), out);
        }
        // Run KNN DD
        runForEachK(// 
        "KNNDD", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNDD<O>(distf, k).run(database, relation), out);
        // Run KNN SOS
        runForEachK(// 
        "KNNSOS", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNSOS<O>(distf, k).run(relation), out);
        // Run ISOS
        runForEachK(// 
        "ISOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new ISOS<O>(distf, k, AggregatedHillEstimator.STATIC).run(relation), out);
    } catch (FileNotFoundException e) {
        throw new AbortException("Cannot create output file.", e);
    }
}
Also used : PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) InputStep(de.lmu.ifi.dbs.elki.workflow.InputStep) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LDOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDOF) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) Reference(de.lmu.ifi.dbs.elki.utilities.documentation.Reference) COF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.COF) ObjectParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Locale(java.util.Locale) KNNWeightOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier) FastMath(net.jafama.FastMath) LDF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDF) ByLabelOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.ByLabelOutlier) DistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction) FastABOD(de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased.FastABOD) OptionID(de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID) DistanceBasedAlgorithm(de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm) FormatUtil(de.lmu.ifi.dbs.elki.utilities.io.FormatUtil) ScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction) LoOP(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LoOP) TypeUtil(de.lmu.ifi.dbs.elki.data.type.TypeUtil) EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) AggregatedHillEstimator(de.lmu.ifi.dbs.elki.math.statistics.intrinsicdimensionality.AggregatedHillEstimator) PolynomialKernelFunction(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction) KNNDD(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNDD) FileNotFoundException(java.io.FileNotFoundException) ISOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.ISOS) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) IntParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter) IdentityScaling(de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling) Pattern(java.util.regex.Pattern) LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Parameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization) KNNOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier) KDEOS(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.KDEOS) QueryUtil(de.lmu.ifi.dbs.elki.database.QueryUtil) BiConsumer(java.util.function.BiConsumer) GaussianKernelDensityFunction(de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction) CommonConstraints(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) IntFunction(java.util.function.IntFunction) PrintStream(java.io.PrintStream) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor) AbstractApplication(de.lmu.ifi.dbs.elki.application.AbstractApplication) IntrinsicDimensionalityOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IntrinsicDimensionalityOutlier) File(java.io.File) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) ODIN(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.ODIN) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) DWOF(de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF) LocalIsolationCoefficient(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.LocalIsolationCoefficient) VarianceOfVolume(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.VarianceOfVolume) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) IDOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IDOS) KNNSOS(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNSOS) Logging(de.lmu.ifi.dbs.elki.logging.Logging) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) PatternParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) IdentityScaling(de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling) PolynomialKernelFunction(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) FileNotFoundException(java.io.FileNotFoundException) DWOF(de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) MaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor) KNNDD(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNDD) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) LDF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDF) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) IntrinsicDimensionalityOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IntrinsicDimensionalityOutlier) Database(de.lmu.ifi.dbs.elki.database.Database) COF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.COF) IDOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IDOS) ODIN(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.ODIN) LoOP(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LoOP) PrintStream(java.io.PrintStream) LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) KNNSOS(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNSOS) KNNWeightOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier) VarianceOfVolume(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.VarianceOfVolume) LDOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDOF) ISOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.ISOS) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) KNNOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier) DistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction) EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) KDEOS(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.KDEOS) PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) LocalIsolationCoefficient(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.LocalIsolationCoefficient) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) FastABOD(de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased.FastABOD)

Example 62 with BiConsumer

use of java.util.function.BiConsumer in project presto by prestodb.

the class TestPinotQueryGenerator method testUnaryAggregationHelper.

private void testUnaryAggregationHelper(BiConsumer<PlanBuilder, PlanBuilder.AggregationBuilder> aggregationFunctionBuilder, String expectedAggOutput) {
    PlanNode justScan = buildPlan(planBuilder -> tableScan(planBuilder, pinotTable, regionId, secondsSinceEpoch, city, fare));
    PlanNode filter = buildPlan(planBuilder -> filter(planBuilder, tableScan(planBuilder, pinotTable, regionId, secondsSinceEpoch, city, fare), getRowExpression("fare > 3", defaultSessionHolder)));
    PlanNode anotherFilter = buildPlan(planBuilder -> filter(planBuilder, tableScan(planBuilder, pinotTable, regionId, secondsSinceEpoch, city, fare), getRowExpression("secondssinceepoch between 200 and 300 and regionid >= 40", defaultSessionHolder)));
    PlanNode filterWithMultiValue = buildPlan(planBuilder -> filter(planBuilder, tableScan(planBuilder, pinotTable, regionId, secondsSinceEpoch, city, fare, scores), getRowExpression("contains(scores, 100) OR contains(scores, 200)", defaultSessionHolder)));
    testPinotQuery(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(justScan).globalGrouping())), format("SELECT %s FROM realtimeOnly", getExpectedAggOutput(expectedAggOutput, "")));
    testPinotQuery(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(filter).globalGrouping())), format("SELECT %s FROM realtimeOnly WHERE (fare > 3)", getExpectedAggOutput(expectedAggOutput, "")));
    testPinotQuery(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(filter).singleGroupingSet(variable("regionid")))), format("SELECT %s FROM realtimeOnly WHERE (fare > 3) GROUP BY regionId %s 10000", getExpectedAggOutput(expectedAggOutput, "regionId"), getGroupByLimitKey()));
    testPinotQuery(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(justScan).singleGroupingSet(variable("regionid")))), format("SELECT %s FROM realtimeOnly GROUP BY regionId %s 10000", getExpectedAggOutput(expectedAggOutput, "regionId"), getGroupByLimitKey()));
    testPinotQuery(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(anotherFilter).singleGroupingSet(variable("regionid"), variable("city")))), format("SELECT %s FROM realtimeOnly WHERE ((secondsSinceEpoch BETWEEN 200 AND 300) AND (regionId >= 40)) GROUP BY regionId, city %s 10000", getExpectedAggOutput(expectedAggOutput, "regionId, city"), getGroupByLimitKey()));
    testPinotQuery(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(filterWithMultiValue).singleGroupingSet(variable("regionid"), variable("city")))), format("SELECT %s FROM realtimeOnly WHERE ((scores = 100) OR (scores = 200)) GROUP BY regionId, city %s 10000", getExpectedAggOutput(expectedAggOutput, "regionId, city"), getGroupByLimitKey()));
}
Also used : Arrays(java.util.Arrays) AggregationNode(com.facebook.presto.spi.plan.AggregationNode) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) Assert.assertEquals(org.testng.Assert.assertEquals) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) Test(org.testng.annotations.Test) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) DistinctLimitNode(com.facebook.presto.spi.plan.DistinctLimitNode) Function(java.util.function.Function) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) ImmutableList(com.google.common.collect.ImmutableList) PinotTableHandle(com.facebook.presto.pinot.PinotTableHandle) Map(java.util.Map) PlanBuilder(com.facebook.presto.sql.planner.iterative.rule.test.PlanBuilder) BiConsumer(java.util.function.BiConsumer) OrderingScheme(com.facebook.presto.spi.plan.OrderingScheme) NoSuchElementException(java.util.NoSuchElementException) Assert.assertFalse(org.testng.Assert.assertFalse) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) PinotColumnHandle(com.facebook.presto.pinot.PinotColumnHandle) SortOrder(com.facebook.presto.common.block.SortOrder) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Ordering(com.facebook.presto.spi.plan.Ordering) Set(java.util.Set) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) PlanNode(com.facebook.presto.spi.plan.PlanNode) List(java.util.List) TestPinotQueryBase(com.facebook.presto.pinot.TestPinotQueryBase) ProjectNode(com.facebook.presto.spi.plan.ProjectNode) TableScanNode(com.facebook.presto.spi.plan.TableScanNode) PinotConfig(com.facebook.presto.pinot.PinotConfig) TopNNode(com.facebook.presto.spi.plan.TopNNode) Optional(java.util.Optional) Assert.assertTrue(org.testng.Assert.assertTrue) PlanNode(com.facebook.presto.spi.plan.PlanNode)

Example 63 with BiConsumer

use of java.util.function.BiConsumer in project presto by prestodb.

the class TestDruidQueryGenerator method testCountStar.

@Test
public void testCountStar() {
    BiConsumer<PlanBuilder, PlanBuilder.AggregationBuilder> aggregationFunctionBuilder = (planBuilder, aggregationBuilder) -> aggregationBuilder.addAggregation(planBuilder.variable("agg"), getRowExpression("count(*)", defaultSessionHolder));
    PlanNode justScan = buildPlan(planBuilder -> tableScan(planBuilder, druidTable, regionId, secondsSinceEpoch, city, fare));
    PlanNode filter = buildPlan(planBuilder -> filter(planBuilder, tableScan(planBuilder, druidTable, regionId, secondsSinceEpoch, city, fare), getRowExpression("fare > 3", defaultSessionHolder)));
    PlanNode anotherFilter = buildPlan(planBuilder -> filter(planBuilder, tableScan(planBuilder, druidTable, regionId, secondsSinceEpoch, city, fare), getRowExpression("secondssinceepoch between 200 and 300 and \"region.id\" >= 40", defaultSessionHolder)));
    testDQL(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(justScan).globalGrouping())), "SELECT count(*) FROM \"realtimeOnly\"");
    testDQL(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(filter).globalGrouping())), "SELECT count(*) FROM \"realtimeOnly\" WHERE (\"fare\" > 3)");
    testDQL(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(filter).singleGroupingSet(variable("region.id")))), "SELECT \"region.Id\", count(*) FROM \"realtimeOnly\" WHERE (\"fare\" > 3) GROUP BY \"region.Id\"");
    testDQL(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(justScan).singleGroupingSet(variable("region.id")))), "SELECT \"region.Id\", count(*) FROM \"realtimeOnly\" GROUP BY \"region.Id\"");
    testDQL(planBuilder -> limit(planBuilder, 5L, planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(justScan).singleGroupingSet(variable("region.id"))))), "SELECT \"region.Id\", count(*) FROM \"realtimeOnly\" GROUP BY \"region.Id\" LIMIT 5");
    testDQL(planBuilder -> planBuilder.aggregation(aggBuilder -> aggregationFunctionBuilder.accept(planBuilder, aggBuilder.source(anotherFilter).singleGroupingSet(variable("region.id"), variable("city")))), "SELECT \"region.Id\", \"city\", count(*) FROM \"realtimeOnly\" WHERE ((\"secondsSinceEpoch\" BETWEEN 200 AND 300) AND (\"region.Id\" >= 40)) GROUP BY \"region.Id\", \"city\"");
}
Also used : PlanNode(com.facebook.presto.spi.plan.PlanNode) ImmutableList(com.google.common.collect.ImmutableList) ImmutableMap(com.google.common.collect.ImmutableMap) Map(java.util.Map) PlanBuilder(com.facebook.presto.sql.planner.iterative.rule.test.PlanBuilder) BiConsumer(java.util.function.BiConsumer) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) PlanNode(com.facebook.presto.spi.plan.PlanNode) PlanBuilder(com.facebook.presto.sql.planner.iterative.rule.test.PlanBuilder) Test(org.testng.annotations.Test)

Example 64 with BiConsumer

use of java.util.function.BiConsumer in project presto by prestodb.

the class TestBlockRetainedSizeBreakdown method checkRetainedSize.

private static void checkRetainedSize(Block block, boolean getRegionCreateNewObjects) {
    AtomicLong objectSize = new AtomicLong();
    Object2LongOpenCustomHashMap<Object> trackedObjects = new Object2LongOpenCustomHashMap<>(new ObjectStrategy());
    BiConsumer<Object, Long> consumer = (object, size) -> {
        objectSize.addAndGet(size);
        trackedObjects.addTo(object, 1);
    };
    block.retainedBytesForEachPart(consumer);
    assertEquals(objectSize.get(), block.getRetainedSizeInBytes());
    Block copyBlock = block.getRegion(0, block.getPositionCount() / 2);
    copyBlock.retainedBytesForEachPart(consumer);
    assertEquals(objectSize.get(), block.getRetainedSizeInBytes() + copyBlock.getRetainedSizeInBytes());
    assertEquals(trackedObjects.getLong(block), 1);
    assertEquals(trackedObjects.getLong(copyBlock), 1);
    trackedObjects.remove(block);
    trackedObjects.remove(copyBlock);
    for (long value : trackedObjects.values()) {
        assertEquals(value, getRegionCreateNewObjects ? 1 : 2);
    }
}
Also used : Object2LongOpenCustomHashMap(it.unimi.dsi.fastutil.objects.Object2LongOpenCustomHashMap) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) TypeUtils.writeNativeValue(com.facebook.presto.common.type.TypeUtils.writeNativeValue) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) Object2LongOpenCustomHashMap(it.unimi.dsi.fastutil.objects.Object2LongOpenCustomHashMap) DynamicSliceOutput(io.airlift.slice.DynamicSliceOutput) AtomicLong(java.util.concurrent.atomic.AtomicLong) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) BiConsumer(java.util.function.BiConsumer) Strategy(it.unimi.dsi.fastutil.Hash.Strategy) Optional(java.util.Optional) Type(com.facebook.presto.common.type.Type) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 65 with BiConsumer

use of java.util.function.BiConsumer in project presto by prestodb.

the class TestAddExchangesPlans method assertPlanWithMergePartitionStrategy.

private void assertPlanWithMergePartitionStrategy(String sql, String partitionMergingStrategy, int remoteRepartitionExchangeCount, PlanMatchPattern pattern) {
    Session session = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(AGGREGATION_PARTITIONING_MERGING_STRATEGY, partitionMergingStrategy).setSystemProperty(TASK_CONCURRENCY, "2").build();
    BiConsumer<Plan, Integer> validateMultipleRemoteRepartitionExchange = (plan, count) -> assertEquals(searchFrom(plan.getRoot()).where(node -> node instanceof ExchangeNode && ((ExchangeNode) node).getScope() == REMOTE_STREAMING && ((ExchangeNode) node).getType() == REPARTITION).count(), count.intValue());
    assertPlanWithSession(sql, session, false, pattern, plan -> validateMultipleRemoteRepartitionExchange.accept(plan, remoteRepartitionExchangeCount));
}
Also used : ALL(com.facebook.presto.execution.QueryManagerConfig.ExchangeMaterializationStrategy.ALL) AggregationNode(com.facebook.presto.spi.plan.AggregationNode) BasePlanTest(com.facebook.presto.sql.planner.assertions.BasePlanTest) PlanMatchPattern.anyTree(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.anyTree) GroupIdNode(com.facebook.presto.sql.planner.plan.GroupIdNode) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) SINGLE(com.facebook.presto.spi.plan.AggregationNode.Step.SINGLE) TestingSession(com.facebook.presto.testing.TestingSession) PlanMatchPattern(com.facebook.presto.sql.planner.assertions.PlanMatchPattern) PlanMatchPattern.join(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.join) PlanMatchPattern.singleGroupingSet(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.singleGroupingSet) AGGREGATION_PARTITIONING_MERGING_STRATEGY(com.facebook.presto.SystemSessionProperties.AGGREGATION_PARTITIONING_MERGING_STRATEGY) ImmutableList(com.google.common.collect.ImmutableList) PREFER_EXACT_PARTITIONING(com.facebook.presto.sql.analyzer.FeaturesConfig.PartitioningPrecisionStrategy.PREFER_EXACT_PARTITIONING) LOCAL(com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.LOCAL) REPARTITION(com.facebook.presto.sql.planner.plan.ExchangeNode.Type.REPARTITION) PlanMatchPattern.equiJoinClause(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.equiJoinClause) BiConsumer(java.util.function.BiConsumer) Plan(com.facebook.presto.sql.planner.Plan) PlanMatchPattern.aggregation(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.aggregation) PlanNodeSearcher.searchFrom(com.facebook.presto.sql.planner.optimizations.PlanNodeSearcher.searchFrom) FunctionCall(com.facebook.presto.sql.tree.FunctionCall) EXCHANGE_MATERIALIZATION_STRATEGY(com.facebook.presto.SystemSessionProperties.EXCHANGE_MATERIALIZATION_STRATEGY) ImmutableMap(com.google.common.collect.ImmutableMap) PlanMatchPattern.semiJoin(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.semiJoin) Session(com.facebook.presto.Session) ELIMINATE_CROSS_JOINS(com.facebook.presto.sql.analyzer.FeaturesConfig.JoinReorderingStrategy.ELIMINATE_CROSS_JOINS) PlanMatchPattern.tableScan(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.tableScan) PlanMatchPattern.anySymbol(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.anySymbol) PlanMatchPattern.exchange(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.exchange) JOIN_DISTRIBUTION_TYPE(com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) USE_STREAMING_EXCHANGE_FOR_MARK_DISTINCT(com.facebook.presto.SystemSessionProperties.USE_STREAMING_EXCHANGE_FOR_MARK_DISTINCT) PARTITIONING_PRECISION_STRATEGY(com.facebook.presto.SystemSessionProperties.PARTITIONING_PRECISION_STRATEGY) TASK_CONCURRENCY(com.facebook.presto.SystemSessionProperties.TASK_CONCURRENCY) PARTITIONED(com.facebook.presto.sql.analyzer.FeaturesConfig.JoinDistributionType.PARTITIONED) REMOTE_STREAMING(com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.REMOTE_STREAMING) JOIN_REORDERING_STRATEGY(com.facebook.presto.SystemSessionProperties.JOIN_REORDERING_STRATEGY) REMOTE_MATERIALIZED(com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.REMOTE_MATERIALIZED) Optional(java.util.Optional) PlanMatchPattern.values(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.values) ExpectedValueProvider(com.facebook.presto.sql.planner.assertions.ExpectedValueProvider) ExchangeNode(com.facebook.presto.sql.planner.plan.ExchangeNode) PlanMatchPattern.node(com.facebook.presto.sql.planner.assertions.PlanMatchPattern.node) INNER(com.facebook.presto.sql.planner.plan.JoinNode.Type.INNER) ExchangeNode(com.facebook.presto.sql.planner.plan.ExchangeNode) Plan(com.facebook.presto.sql.planner.Plan) TestingSession(com.facebook.presto.testing.TestingSession) Session(com.facebook.presto.Session)

Aggregations

BiConsumer (java.util.function.BiConsumer)255 Test (org.junit.Test)110 List (java.util.List)106 Map (java.util.Map)77 IOException (java.io.IOException)75 Consumer (java.util.function.Consumer)69 ArrayList (java.util.ArrayList)68 HashMap (java.util.HashMap)64 Collectors (java.util.stream.Collectors)53 CountDownLatch (java.util.concurrent.CountDownLatch)52 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)50 Collections (java.util.Collections)46 Set (java.util.Set)46 Collection (java.util.Collection)45 Arrays (java.util.Arrays)44 TimeUnit (java.util.concurrent.TimeUnit)43 Assert (org.junit.Assert)43 Function (java.util.function.Function)41 Optional (java.util.Optional)40 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)35