Search in sources :

Example 1 with Evaluator

use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.

the class SparkTable method canDeleteUsingMetadata.

// a metadata delete is possible iff matching files can be deleted entirely
private boolean canDeleteUsingMetadata(Expression deleteExpr) {
    boolean caseSensitive = Boolean.parseBoolean(sparkSession().conf().get("spark.sql.caseSensitive"));
    TableScan scan = table().newScan().filter(deleteExpr).caseSensitive(caseSensitive).includeColumnStats().ignoreResiduals();
    try (CloseableIterable<FileScanTask> tasks = scan.planFiles()) {
        Map<Integer, Evaluator> evaluators = Maps.newHashMap();
        StrictMetricsEvaluator metricsEvaluator = new StrictMetricsEvaluator(table().schema(), deleteExpr);
        return Iterables.all(tasks, task -> {
            DataFile file = task.file();
            PartitionSpec spec = task.spec();
            Evaluator evaluator = evaluators.computeIfAbsent(spec.specId(), specId -> new Evaluator(spec.partitionType(), Projections.strict(spec).project(deleteExpr)));
            return evaluator.eval(file.partition()) || metricsEvaluator.eval(file);
        });
    } catch (IOException ioe) {
        LOG.warn("Failed to close task iterable", ioe);
        return false;
    }
}
Also used : DataFile(org.apache.iceberg.DataFile) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) FileScanTask(org.apache.iceberg.FileScanTask) Evaluator(org.apache.iceberg.expressions.Evaluator) StrictMetricsEvaluator(org.apache.iceberg.expressions.StrictMetricsEvaluator) StrictMetricsEvaluator(org.apache.iceberg.expressions.StrictMetricsEvaluator) PartitionSpec(org.apache.iceberg.PartitionSpec)

Example 2 with Evaluator

use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.

the class BaseOverwriteFiles method validate.

@Override
protected void validate(TableMetadata base) {
    if (validateAddedFilesMatchOverwriteFilter) {
        PartitionSpec spec = dataSpec();
        Expression rowFilter = rowFilter();
        Expression inclusiveExpr = Projections.inclusive(spec).project(rowFilter);
        Evaluator inclusive = new Evaluator(spec.partitionType(), inclusiveExpr);
        Expression strictExpr = Projections.strict(spec).project(rowFilter);
        Evaluator strict = new Evaluator(spec.partitionType(), strictExpr);
        StrictMetricsEvaluator metrics = new StrictMetricsEvaluator(base.schema(), rowFilter, isCaseSensitive());
        for (DataFile file : addedFiles()) {
            // the real test is that the strict or metrics test matches the file, indicating that all
            // records in the file match the filter. inclusive is used to avoid testing the metrics,
            // which is more complicated
            ValidationException.check(inclusive.eval(file.partition()) && (strict.eval(file.partition()) || metrics.eval(file)), "Cannot append file with rows that do not match filter: %s: %s", rowFilter, file.path());
        }
    }
    if (validateNewDataFiles) {
        validateAddedDataFiles(base, startingSnapshotId, dataConflictDetectionFilter());
    }
    if (validateNewDeletes) {
        if (rowFilter() != Expressions.alwaysFalse()) {
            Expression filter = conflictDetectionFilter != null ? conflictDetectionFilter : rowFilter();
            validateNoNewDeleteFiles(base, startingSnapshotId, filter);
            validateDeletedDataFiles(base, startingSnapshotId, filter);
        }
        if (deletedDataFiles.size() > 0) {
            validateNoNewDeletesForDataFiles(base, startingSnapshotId, conflictDetectionFilter, deletedDataFiles);
        }
    }
}
Also used : Expression(org.apache.iceberg.expressions.Expression) Evaluator(org.apache.iceberg.expressions.Evaluator) StrictMetricsEvaluator(org.apache.iceberg.expressions.StrictMetricsEvaluator) StrictMetricsEvaluator(org.apache.iceberg.expressions.StrictMetricsEvaluator)

Example 3 with Evaluator

use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.

the class TestStartsWith method testTruncateString.

@Test
public void testTruncateString() {
    Truncate<String> trunc = Truncate.get(Types.StringType.get(), 2);
    Expression expr = startsWith(COLUMN, "abcde");
    BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(), expr, false);
    UnboundPredicate<String> projected = trunc.project(COLUMN, boundExpr);
    Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected);
    Assert.assertTrue("startsWith(abcde, truncate(abcdg,2))  => true", evaluator.eval(TestHelpers.Row.of("abcdg")));
}
Also used : Expression(org.apache.iceberg.expressions.Expression) BoundPredicate(org.apache.iceberg.expressions.BoundPredicate) Evaluator(org.apache.iceberg.expressions.Evaluator) Test(org.junit.Test)

Example 4 with Evaluator

use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.

the class TestNotStartsWith method testTruncateStringWhenProjectedPredicateTermIsShorterThanWidth.

@Test
public void testTruncateStringWhenProjectedPredicateTermIsShorterThanWidth() {
    Truncate<String> trunc = Truncate.get(Types.StringType.get(), 16);
    UnboundPredicate<String> expr = notStartsWith(COLUMN, "ab");
    BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(), expr, false);
    UnboundPredicate<String> projected = trunc.projectStrict(COLUMN, boundExpr);
    Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected);
    Assert.assertEquals("The projected literal should not be truncated as its size is shorter than truncation width", projected.literal().value(), "ab");
    Assert.assertFalse("notStartsWith(ab, truncate(abcde, 16)) => false", evaluator.eval(TestHelpers.Row.of("abcde")));
    Assert.assertFalse("notStartsWith(ab, truncate(ab, 16)) => false", evaluator.eval(TestHelpers.Row.of("ab")));
    Assert.assertTrue("notStartsWith(ab, truncate(a, 16)) => true", evaluator.eval(TestHelpers.Row.of("a")));
}
Also used : BoundPredicate(org.apache.iceberg.expressions.BoundPredicate) InclusiveMetricsEvaluator(org.apache.iceberg.expressions.InclusiveMetricsEvaluator) StrictMetricsEvaluator(org.apache.iceberg.expressions.StrictMetricsEvaluator) Evaluator(org.apache.iceberg.expressions.Evaluator) Test(org.junit.Test)

Example 5 with Evaluator

use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.

the class ManifestGroup method entries.

private <T> Iterable<CloseableIterable<T>> entries(BiFunction<ManifestFile, CloseableIterable<ManifestEntry<DataFile>>, CloseableIterable<T>> entryFn) {
    LoadingCache<Integer, ManifestEvaluator> evalCache = specsById == null ? null : Caffeine.newBuilder().build(specId -> {
        PartitionSpec spec = specsById.get(specId);
        return ManifestEvaluator.forPartitionFilter(Expressions.and(partitionFilter, Projections.inclusive(spec, caseSensitive).project(dataFilter)), spec, caseSensitive);
    });
    Evaluator evaluator;
    if (fileFilter != null && fileFilter != Expressions.alwaysTrue()) {
        evaluator = new Evaluator(DataFile.getType(EMPTY_STRUCT), fileFilter, caseSensitive);
    } else {
        evaluator = null;
    }
    Iterable<ManifestFile> matchingManifests = evalCache == null ? dataManifests : Iterables.filter(dataManifests, manifest -> evalCache.get(manifest.partitionSpecId()).eval(manifest));
    if (ignoreDeleted) {
        // only scan manifests that have entries other than deletes
        // remove any manifests that don't have any existing or added files. if either the added or
        // existing files count is missing, the manifest must be scanned.
        matchingManifests = Iterables.filter(matchingManifests, manifest -> manifest.hasAddedFiles() || manifest.hasExistingFiles());
    }
    if (ignoreExisting) {
        // only scan manifests that have entries other than existing
        // remove any manifests that don't have any deleted or added files. if either the added or
        // deleted files count is missing, the manifest must be scanned.
        matchingManifests = Iterables.filter(matchingManifests, manifest -> manifest.hasAddedFiles() || manifest.hasDeletedFiles());
    }
    matchingManifests = Iterables.filter(matchingManifests, manifestPredicate::test);
    return Iterables.transform(matchingManifests, manifest -> {
        ManifestReader<DataFile> reader = ManifestFiles.read(manifest, io, specsById).filterRows(dataFilter).filterPartitions(partitionFilter).caseSensitive(caseSensitive).select(columns);
        CloseableIterable<ManifestEntry<DataFile>> entries = reader.entries();
        if (ignoreDeleted) {
            entries = reader.liveEntries();
        }
        if (ignoreExisting) {
            entries = CloseableIterable.filter(entries, entry -> entry.status() != ManifestEntry.Status.EXISTING);
        }
        if (evaluator != null) {
            entries = CloseableIterable.filter(entries, entry -> evaluator.eval((GenericDataFile) entry.file()));
        }
        entries = CloseableIterable.filter(entries, manifestEntryPredicate);
        return entryFn.apply(manifest, entries);
    });
}
Also used : Caffeine(com.github.benmanes.caffeine.cache.Caffeine) Types(org.apache.iceberg.types.Types) CloseableIterable(org.apache.iceberg.io.CloseableIterable) LoadingCache(com.github.benmanes.caffeine.cache.LoadingCache) Predicate(java.util.function.Predicate) ManifestEvaluator(org.apache.iceberg.expressions.ManifestEvaluator) BiFunction(java.util.function.BiFunction) Set(java.util.Set) Iterables(org.apache.iceberg.relocated.com.google.common.collect.Iterables) Evaluator(org.apache.iceberg.expressions.Evaluator) Sets(org.apache.iceberg.relocated.com.google.common.collect.Sets) List(java.util.List) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) Expression(org.apache.iceberg.expressions.Expression) ResidualEvaluator(org.apache.iceberg.expressions.ResidualEvaluator) Map(java.util.Map) Projections(org.apache.iceberg.expressions.Projections) Expressions(org.apache.iceberg.expressions.Expressions) FileIO(org.apache.iceberg.io.FileIO) ParallelIterable(org.apache.iceberg.util.ParallelIterable) ExecutorService(java.util.concurrent.ExecutorService) ManifestEvaluator(org.apache.iceberg.expressions.ManifestEvaluator) Evaluator(org.apache.iceberg.expressions.Evaluator) ResidualEvaluator(org.apache.iceberg.expressions.ResidualEvaluator) ManifestEvaluator(org.apache.iceberg.expressions.ManifestEvaluator)

Aggregations

Evaluator (org.apache.iceberg.expressions.Evaluator)11 Expression (org.apache.iceberg.expressions.Expression)5 InclusiveMetricsEvaluator (org.apache.iceberg.expressions.InclusiveMetricsEvaluator)5 StrictMetricsEvaluator (org.apache.iceberg.expressions.StrictMetricsEvaluator)5 BoundPredicate (org.apache.iceberg.expressions.BoundPredicate)4 Test (org.junit.Test)4 IOException (java.io.IOException)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 FileScanTask (org.apache.iceberg.FileScanTask)2 PartitionSpec (org.apache.iceberg.PartitionSpec)2 TableScan (org.apache.iceberg.TableScan)2 Expressions (org.apache.iceberg.expressions.Expressions)2 Projections (org.apache.iceberg.expressions.Projections)2 CloseableIterable (org.apache.iceberg.io.CloseableIterable)2 Lists (org.apache.iceberg.relocated.com.google.common.collect.Lists)2 Sets (org.apache.iceberg.relocated.com.google.common.collect.Sets)2 Caffeine (com.github.benmanes.caffeine.cache.Caffeine)1 LoadingCache (com.github.benmanes.caffeine.cache.LoadingCache)1