use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.
the class SparkTable method canDeleteUsingMetadata.
// a metadata delete is possible iff matching files can be deleted entirely
private boolean canDeleteUsingMetadata(Expression deleteExpr) {
boolean caseSensitive = Boolean.parseBoolean(sparkSession().conf().get("spark.sql.caseSensitive"));
TableScan scan = table().newScan().filter(deleteExpr).caseSensitive(caseSensitive).includeColumnStats().ignoreResiduals();
try (CloseableIterable<FileScanTask> tasks = scan.planFiles()) {
Map<Integer, Evaluator> evaluators = Maps.newHashMap();
StrictMetricsEvaluator metricsEvaluator = new StrictMetricsEvaluator(table().schema(), deleteExpr);
return Iterables.all(tasks, task -> {
DataFile file = task.file();
PartitionSpec spec = task.spec();
Evaluator evaluator = evaluators.computeIfAbsent(spec.specId(), specId -> new Evaluator(spec.partitionType(), Projections.strict(spec).project(deleteExpr)));
return evaluator.eval(file.partition()) || metricsEvaluator.eval(file);
});
} catch (IOException ioe) {
LOG.warn("Failed to close task iterable", ioe);
return false;
}
}
use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.
the class BaseOverwriteFiles method validate.
@Override
protected void validate(TableMetadata base) {
if (validateAddedFilesMatchOverwriteFilter) {
PartitionSpec spec = dataSpec();
Expression rowFilter = rowFilter();
Expression inclusiveExpr = Projections.inclusive(spec).project(rowFilter);
Evaluator inclusive = new Evaluator(spec.partitionType(), inclusiveExpr);
Expression strictExpr = Projections.strict(spec).project(rowFilter);
Evaluator strict = new Evaluator(spec.partitionType(), strictExpr);
StrictMetricsEvaluator metrics = new StrictMetricsEvaluator(base.schema(), rowFilter, isCaseSensitive());
for (DataFile file : addedFiles()) {
// the real test is that the strict or metrics test matches the file, indicating that all
// records in the file match the filter. inclusive is used to avoid testing the metrics,
// which is more complicated
ValidationException.check(inclusive.eval(file.partition()) && (strict.eval(file.partition()) || metrics.eval(file)), "Cannot append file with rows that do not match filter: %s: %s", rowFilter, file.path());
}
}
if (validateNewDataFiles) {
validateAddedDataFiles(base, startingSnapshotId, dataConflictDetectionFilter());
}
if (validateNewDeletes) {
if (rowFilter() != Expressions.alwaysFalse()) {
Expression filter = conflictDetectionFilter != null ? conflictDetectionFilter : rowFilter();
validateNoNewDeleteFiles(base, startingSnapshotId, filter);
validateDeletedDataFiles(base, startingSnapshotId, filter);
}
if (deletedDataFiles.size() > 0) {
validateNoNewDeletesForDataFiles(base, startingSnapshotId, conflictDetectionFilter, deletedDataFiles);
}
}
}
use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.
the class TestStartsWith method testTruncateString.
@Test
public void testTruncateString() {
Truncate<String> trunc = Truncate.get(Types.StringType.get(), 2);
Expression expr = startsWith(COLUMN, "abcde");
BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(), expr, false);
UnboundPredicate<String> projected = trunc.project(COLUMN, boundExpr);
Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected);
Assert.assertTrue("startsWith(abcde, truncate(abcdg,2)) => true", evaluator.eval(TestHelpers.Row.of("abcdg")));
}
use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.
the class TestNotStartsWith method testTruncateStringWhenProjectedPredicateTermIsShorterThanWidth.
@Test
public void testTruncateStringWhenProjectedPredicateTermIsShorterThanWidth() {
Truncate<String> trunc = Truncate.get(Types.StringType.get(), 16);
UnboundPredicate<String> expr = notStartsWith(COLUMN, "ab");
BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(), expr, false);
UnboundPredicate<String> projected = trunc.projectStrict(COLUMN, boundExpr);
Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected);
Assert.assertEquals("The projected literal should not be truncated as its size is shorter than truncation width", projected.literal().value(), "ab");
Assert.assertFalse("notStartsWith(ab, truncate(abcde, 16)) => false", evaluator.eval(TestHelpers.Row.of("abcde")));
Assert.assertFalse("notStartsWith(ab, truncate(ab, 16)) => false", evaluator.eval(TestHelpers.Row.of("ab")));
Assert.assertTrue("notStartsWith(ab, truncate(a, 16)) => true", evaluator.eval(TestHelpers.Row.of("a")));
}
use of org.apache.iceberg.expressions.Evaluator in project iceberg by apache.
the class ManifestGroup method entries.
private <T> Iterable<CloseableIterable<T>> entries(BiFunction<ManifestFile, CloseableIterable<ManifestEntry<DataFile>>, CloseableIterable<T>> entryFn) {
LoadingCache<Integer, ManifestEvaluator> evalCache = specsById == null ? null : Caffeine.newBuilder().build(specId -> {
PartitionSpec spec = specsById.get(specId);
return ManifestEvaluator.forPartitionFilter(Expressions.and(partitionFilter, Projections.inclusive(spec, caseSensitive).project(dataFilter)), spec, caseSensitive);
});
Evaluator evaluator;
if (fileFilter != null && fileFilter != Expressions.alwaysTrue()) {
evaluator = new Evaluator(DataFile.getType(EMPTY_STRUCT), fileFilter, caseSensitive);
} else {
evaluator = null;
}
Iterable<ManifestFile> matchingManifests = evalCache == null ? dataManifests : Iterables.filter(dataManifests, manifest -> evalCache.get(manifest.partitionSpecId()).eval(manifest));
if (ignoreDeleted) {
// only scan manifests that have entries other than deletes
// remove any manifests that don't have any existing or added files. if either the added or
// existing files count is missing, the manifest must be scanned.
matchingManifests = Iterables.filter(matchingManifests, manifest -> manifest.hasAddedFiles() || manifest.hasExistingFiles());
}
if (ignoreExisting) {
// only scan manifests that have entries other than existing
// remove any manifests that don't have any deleted or added files. if either the added or
// deleted files count is missing, the manifest must be scanned.
matchingManifests = Iterables.filter(matchingManifests, manifest -> manifest.hasAddedFiles() || manifest.hasDeletedFiles());
}
matchingManifests = Iterables.filter(matchingManifests, manifestPredicate::test);
return Iterables.transform(matchingManifests, manifest -> {
ManifestReader<DataFile> reader = ManifestFiles.read(manifest, io, specsById).filterRows(dataFilter).filterPartitions(partitionFilter).caseSensitive(caseSensitive).select(columns);
CloseableIterable<ManifestEntry<DataFile>> entries = reader.entries();
if (ignoreDeleted) {
entries = reader.liveEntries();
}
if (ignoreExisting) {
entries = CloseableIterable.filter(entries, entry -> entry.status() != ManifestEntry.Status.EXISTING);
}
if (evaluator != null) {
entries = CloseableIterable.filter(entries, entry -> evaluator.eval((GenericDataFile) entry.file()));
}
entries = CloseableIterable.filter(entries, manifestEntryPredicate);
return entryFn.apply(manifest, entries);
});
}
Aggregations