use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class QueryGenerator method getPathsAndFiltersForGetElements.
private ParquetQuery getPathsAndFiltersForGetElements(final GetElements getElements) throws SerialisationException, OperationException {
final Iterable<? extends ElementId> seeds = getElements.getInput();
if (null == seeds || !seeds.iterator().hasNext()) {
return new ParquetQuery();
}
// Stage 1: Use the view to identify all groups that might contain data
final Set<String> allRelevantGroups = getRelevantGroups(getElements.getView());
// Stage 2: For each of the above groups, create a Parquet predicate from the view and directedType
final Map<String, Pair<FilterPredicate, Boolean>> groupToPredicate = new HashMap<>();
for (final String group : allRelevantGroups) {
Pair<FilterPredicate, Boolean> filter = getPredicateFromView(getElements.getView(), group, schemaUtils.getEntityGroups().contains(group));
if (schemaUtils.getEdgeGroups().contains(group)) {
final FilterPredicate directedTypeFilter = getPredicateFromDirectedType(getElements.getDirectedType());
filter.setFirst(FilterPredicateUtils.and(filter.getFirst(), directedTypeFilter));
}
groupToPredicate.put(group, filter);
}
// Stage 3: Convert seeds to ParquetElementSeeds and create Stream of <group, ParquetElementSeed> pairs where
// each seed appears once for each of the relevant groups
final Stream<Pair<String, ParquetElementSeed>> groupAndSeeds = StreamSupport.stream(seeds.spliterator(), false).flatMap(seed -> {
try {
return seedToParquetObject(seed, allRelevantGroups).stream();
} catch (final SerialisationException e) {
throw new RuntimeException("SerialisationException converting seed into a Parquet object", e);
}
});
// Stage 4: Convert stream of <group, ParquetElementSeed> pars to stream of tuples
// <group, ParquetElementSeed, List<PathInfo>>
final Stream<Tuple3<String, ParquetElementSeed, Set<PathInfo>>> groupSeedsAndPaths = groupAndSeeds.map(pair -> getRelevantFiles(pair.getFirst(), pair.getSecond()));
// Stage 5: Create map from path to list of <group, reversed edge flag, Parquet seeds>
// TODO: Currently this consumes the entire stream - need to do this in batches
final List<Tuple3<String, ParquetElementSeed, Set<PathInfo>>> groupSeedsAndPathsList = groupSeedsAndPaths.collect(Collectors.toList());
final Map<PathInfo, List<Tuple3<String, Boolean, ParquetElementSeed>>> pathToSeeds = new HashMap<>();
for (final Tuple3<String, ParquetElementSeed, Set<PathInfo>> tuple : groupSeedsAndPathsList) {
Set<PathInfo> paths = tuple.get2();
for (final PathInfo pathInfo : paths) {
if (!pathToSeeds.containsKey(pathInfo)) {
pathToSeeds.put(pathInfo, new ArrayList<>());
}
pathToSeeds.get(pathInfo).add(new Tuple3<>(tuple.get0(), pathInfo.isReversed(), tuple.get1()));
}
}
// Stage 6: Create ParquetQuery
final SeededGraphFilters.IncludeIncomingOutgoingType includeIncomingOutgoingType = getElements.getIncludeIncomingOutGoing();
final SeedMatching.SeedMatchingType seedMatchingType = getElements.getSeedMatching();
final ParquetQuery parquetQuery = new ParquetQuery();
for (final PathInfo pathInfo : pathToSeeds.keySet()) {
List<Tuple3<String, Boolean, ParquetElementSeed>> seedList = pathToSeeds.get(pathInfo);
FilterPredicate filterPredicate = seedsToPredicate(seedList, includeIncomingOutgoingType, seedMatchingType);
if (null != filterPredicate) {
final String group = pathInfo.getGroup();
final Pair<FilterPredicate, Boolean> viewFilterPredicate = groupToPredicate.get(group);
if (null != viewFilterPredicate) {
// Put view predicate first as filter for checking whether it matches one of many seeds could be complex
filterPredicate = FilterPredicateUtils.and(viewFilterPredicate.getFirst(), filterPredicate);
}
final ParquetFileQuery fileQuery = new ParquetFileQuery(pathInfo.getPath(), filterPredicate, viewFilterPredicate.getSecond());
parquetQuery.add(group, fileQuery);
}
}
LOGGER.info("Created ParquetQuery of {}", parquetQuery);
return parquetQuery;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getAgeOffPredicate.
public FilterPredicate getAgeOffPredicate(final AgeOff ageOff, final String[] selection, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, selection[0]);
if (paths == null) {
paths = new String[1];
paths[0] = selection[0];
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
Long ageOffTime = System.currentTimeMillis() - ageOff.getAgeOffTime();
tempFilter = gt(longColumn(path), ageOffTime);
if (filter == null) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getParquetPredicate.
public FilterPredicate getParquetPredicate() throws SerialisationException {
FilterPredicate filterResult;
if (javaPredicate instanceof AgeOff) {
filterResult = getAgeOffPredicate((AgeOff) javaPredicate, selection, group, schemaUtils);
} else if (javaPredicate instanceof And) {
final And and = (And) javaPredicate;
filterResult = getAndFilter((List<Predicate>) and.getComponents(), selection, group, schemaUtils);
} else if (javaPredicate instanceof Or) {
final Or or = (Or) javaPredicate;
filterResult = getOrFilter((List<Predicate>) or.getComponents(), selection, group, schemaUtils);
} else if (javaPredicate instanceof Not) {
final Not not = (Not) javaPredicate;
final JavaPredicateToParquetPredicate predicateConverter = new JavaPredicateToParquetPredicate(schemaUtils, not.getPredicate(), selection, group);
final FilterPredicate parquetPredicate = predicateConverter.getParquetPredicate();
if (!predicateConverter.fullyApplied) {
fullyApplied = false;
}
filterResult = FilterPredicateUtils.not(parquetPredicate);
} else {
filterResult = getPrimitiveFilter(javaPredicate, selection[0], group, schemaUtils);
}
return filterResult;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsLessThanFilter.
private FilterPredicate getIsLessThanFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = lt(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Double) {
tempFilter = lt(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = lt(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = lt(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = lt(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = lt(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = lt(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = lt(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = lt(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsLessThan filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project hive by apache.
the class FilterPredicateLeafBuilder method buildPredicate.
/**
* Build filter predicate with multiple constants
*
* @param op IN or BETWEEN
* @param literals
* @param columnName
* @param columnType
* @return
*/
public FilterPredicate buildPredicate(PredicateLeaf.Operator op, List<Object> literals, String columnName, TypeInfo columnType) throws Exception {
FilterPredicate result = null;
switch(op) {
case IN:
for (Object literal : literals) {
if (result == null) {
result = buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName, columnType);
} else {
result = or(result, buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName, columnType));
}
}
return result;
case BETWEEN:
if (literals.size() != 2) {
throw new RuntimeException("Not able to build 'between' operation filter with " + literals + " which needs two literals");
}
Object min = literals.get(0);
Object max = literals.get(1);
FilterPredicate lt = not(buildPredict(PredicateLeaf.Operator.LESS_THAN, min, columnName, columnType));
FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS, max, columnName, columnType);
result = FilterApi.and(gt, lt);
return result;
default:
throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op);
}
}
Aggregations