use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class QueryGenerator method seedToPredicate.
private FilterPredicate seedToPredicate(final ParquetElementSeed seed, final SeededGraphFilters.IncludeIncomingOutgoingType includeIncomingOutgoingType, final SeedMatching.SeedMatchingType seedMatchingType, final String group, final boolean reversed) {
final boolean isEntityGroup = schemaUtils.getEntityGroups().contains(group);
FilterPredicate filter = null;
final ElementId elementId = seed.getElementId();
// Is it an entity group?
if (isEntityGroup) {
// EntityId case
if (elementId instanceof EntityId) {
filter = getIsEqualFilter(ParquetStore.VERTEX, ((ParquetEntitySeed) seed).getSeed(), group);
} else {
// EdgeId case
// Does the seed type need to match the group type?
final ParquetEdgeSeed edgeSeed = (ParquetEdgeSeed) seed;
if (seedMatchingType != SeedMatching.SeedMatchingType.EQUAL) {
// Vertex = source of edge seed or Vertex = destination of edge seed
// look in partition 0 with filter src = A and partition 1 with filter src = B
filter = getIsEqualFilter(ParquetStore.VERTEX, edgeSeed.getSource(), group);
if (null != ((ParquetEdgeSeed) seed).getDestination()) {
filter = FilterPredicateUtils.or(filter, getIsEqualFilter(ParquetStore.VERTEX, edgeSeed.getDestination(), group));
}
}
}
} else {
// EntityId case
if (elementId instanceof EntityId) {
// If seedMatchingType is EQUAL then we can't find anything in an edge group
if (seedMatchingType != SeedMatching.SeedMatchingType.EQUAL) {
if (includeIncomingOutgoingType == SeededGraphFilters.IncludeIncomingOutgoingType.INCOMING) {
if (reversed) {
// Dst is seed
filter = getIsEqualFilter(ParquetStore.DESTINATION, ((ParquetEntitySeed) seed).getSeed(), group);
} else {
// Src is seed and edge is undirected
filter = getIsEqualFilter(ParquetStore.SOURCE, ((ParquetEntitySeed) seed).getSeed(), group);
filter = FilterPredicateUtils.and(filter, getIsEqualFilter(ParquetStore.DIRECTED, new Object[] { false }, group));
}
} else if (includeIncomingOutgoingType == SeededGraphFilters.IncludeIncomingOutgoingType.OUTGOING) {
if (reversed) {
// Dst is seed and edge is undirected
filter = getIsEqualFilter(ParquetStore.DESTINATION, ((ParquetEntitySeed) seed).getSeed(), group);
filter = FilterPredicateUtils.and(filter, getIsEqualFilter(ParquetStore.DIRECTED, new Object[] { false }, group));
} else {
// Src is seed
filter = getIsEqualFilter(ParquetStore.SOURCE, ((ParquetEntitySeed) seed).getSeed(), group);
}
} else {
if (reversed) {
// Dst is seed
filter = getIsEqualFilter(ParquetStore.DESTINATION, ((ParquetEntitySeed) seed).getSeed(), group);
} else {
// Src is seed
filter = getIsEqualFilter(ParquetStore.SOURCE, ((ParquetEntitySeed) seed).getSeed(), group);
}
}
}
} else {
// EdgeId case
final ParquetEdgeSeed edgeSeed = (ParquetEdgeSeed) seed;
if (!reversed) {
// Src is source of edge seed and destination is destination of edge seed
filter = getIsEqualFilter(ParquetStore.SOURCE, edgeSeed.getSource(), group);
// WRONG seed is already serialised source and dest - now fixed?
filter = FilterPredicateUtils.and(filter, getIsEqualFilter(ParquetStore.DESTINATION, edgeSeed.getDestination(), group));
final DirectedType directedType = edgeSeed.getDirectedType();
if (directedType == DirectedType.DIRECTED) {
filter = FilterPredicateUtils.and(filter, getIsEqualFilter(ParquetStore.DIRECTED, new Object[] { true }, group));
} else if (directedType == DirectedType.UNDIRECTED) {
filter = FilterPredicateUtils.and(filter, getIsEqualFilter(ParquetStore.DIRECTED, new Object[] { false }, group));
}
} else {
// TODO Optimise this - there are times this is unnecessary
filter = getIsEqualFilter(ParquetStore.DESTINATION, edgeSeed.getSource(), group);
filter = FilterPredicateUtils.and(filter, getIsEqualFilter(ParquetStore.SOURCE, edgeSeed.getDestination(), group));
final DirectedType directedType = edgeSeed.getDirectedType();
if (directedType == DirectedType.DIRECTED) {
filter = FilterPredicateUtils.and(filter, getIsEqualFilter(ParquetStore.DIRECTED, new Object[] { true }, group));
} else if (directedType == DirectedType.UNDIRECTED) {
filter = FilterPredicateUtils.and(filter, getIsEqualFilter(ParquetStore.DIRECTED, new Object[] { false }, group));
}
}
}
}
LOGGER.debug("Returning {} from seedToPredicate", filter);
return filter;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class QueryGenerator method getPathsAndFiltersForAllElements.
private ParquetQuery getPathsAndFiltersForAllElements(final GetAllElements getAllElements) throws IOException, OperationException {
// Stage 1: Use the view to identify all groups that might contain data
final Set<String> allRelevantGroups = getRelevantGroups(getAllElements.getView());
// Stage 2: Create map from group to list of files containing data for that group
final Map<String, List<Path>> groupToPaths = new HashMap<>();
for (final String group : allRelevantGroups) {
groupToPaths.put(group, store.getFilesForGroup(group));
}
// Stage 3: For each of the above groups, create a Parquet predicate from the view and directedType
final Map<String, Pair<FilterPredicate, Boolean>> groupToPredicate = new HashMap<>();
for (final String group : groupToPaths.keySet()) {
Pair<FilterPredicate, Boolean> filter = getPredicateFromView(getAllElements.getView(), group, schemaUtils.getEntityGroups().contains(group));
if (schemaUtils.getEdgeGroups().contains(group)) {
final FilterPredicate directedTypeFilter = getPredicateFromDirectedType(getAllElements.getDirectedType());
if (null != filter) {
filter.setFirst(FilterPredicateUtils.and(filter.getFirst(), directedTypeFilter));
} else {
filter = new Pair<>(directedTypeFilter, false);
}
}
if (null != filter) {
groupToPredicate.put(group, filter);
}
}
// Stage 4: Build a ParquetQuery by iterating through the map from group to list of Paths
final ParquetQuery parquetQuery = new ParquetQuery();
for (final Map.Entry<String, List<Path>> entry : groupToPaths.entrySet()) {
for (final Path path : entry.getValue()) {
final String group = entry.getKey();
final ParquetFileQuery fileQuery = groupToPredicate.containsKey(group) ? new ParquetFileQuery(path, groupToPredicate.get(group).getFirst(), groupToPredicate.get(group).getSecond()) : new ParquetFileQuery(path, null, false);
parquetQuery.add(group, fileQuery);
}
}
LOGGER.info("Created ParquetQuery of {}", parquetQuery);
return parquetQuery;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsEqualFilter.
public FilterPredicate getIsEqualFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = eq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Boolean) {
tempFilter = eq(booleanColumn(path), (Boolean) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Double) {
tempFilter = eq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = eq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = eq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = eq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = eq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = eq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = eq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = eq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsEqual filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getIsLessThanOrEqualToFilter.
private FilterPredicate getIsLessThanOrEqualToFilter(final String colName, final Object[] parquetObjects, final String group, final SchemaUtils schemaUtils) {
String[] paths = schemaUtils.getPaths(group, colName);
if (null == paths) {
paths = new String[1];
paths[0] = colName;
}
FilterPredicate filter = null;
for (int i = 0; i < paths.length; i++) {
final String path = paths[i];
FilterPredicate tempFilter;
if (parquetObjects[i] instanceof String) {
tempFilter = ltEq(binaryColumn(path), Binary.fromString((String) parquetObjects[i]));
} else if (parquetObjects[i] instanceof Double) {
tempFilter = ltEq(doubleColumn(path), (Double) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Float) {
tempFilter = ltEq(floatColumn(path), (Float) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Integer) {
tempFilter = ltEq(intColumn(path), (Integer) parquetObjects[i]);
} else if (parquetObjects[i] instanceof Long) {
tempFilter = ltEq(longColumn(path), (Long) parquetObjects[i]);
} else if (parquetObjects[i] instanceof java.util.Date) {
tempFilter = ltEq(longColumn(path), ((java.util.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof java.sql.Date) {
tempFilter = ltEq(longColumn(path), ((java.sql.Date) parquetObjects[i]).getTime());
} else if (parquetObjects[i] instanceof Short) {
tempFilter = ltEq(intColumn(path), ((Short) parquetObjects[i]).intValue());
} else if (parquetObjects[i] instanceof byte[]) {
tempFilter = ltEq(binaryColumn(path), Binary.fromReusedByteArray((byte[]) parquetObjects[i]));
} else {
fullyApplied = false;
LOGGER.warn(parquetObjects[i].getClass().getCanonicalName() + " is not a natively supported type for the IsLessThanOrEqualTo filter, therefore execution will take longer to perform this filter.");
return null;
}
if (null == filter) {
filter = tempFilter;
} else {
filter = and(filter, tempFilter);
}
}
return filter;
}
use of org.apache.parquet.filter2.predicate.FilterPredicate in project Gaffer by gchq.
the class JavaPredicateToParquetPredicate method getOrFilter.
public FilterPredicate getOrFilter(final List<Predicate> predicateList, final String[] selection, final String group, final SchemaUtils schemaUtils) throws SerialisationException {
FilterPredicate combinedFilter = null;
for (final Predicate predicate : predicateList) {
final Predicate filterFunction;
final String[] newSelection;
if (predicate instanceof TupleAdaptedPredicate) {
filterFunction = ((TupleAdaptedPredicate) predicate).getPredicate();
// Build new selections
final Integer[] ints = (Integer[]) ((TupleAdaptedPredicate) predicate).getSelection();
newSelection = new String[ints.length];
for (int x = 0; x < ints.length; x++) {
newSelection[x] = selection[ints[x]];
}
} else {
filterFunction = predicate;
newSelection = selection;
}
final JavaPredicateToParquetPredicate predicateConverter = new JavaPredicateToParquetPredicate(schemaUtils, filterFunction, newSelection, group);
final FilterPredicate parquetPredicate = predicateConverter.getParquetPredicate();
if (!predicateConverter.fullyApplied) {
fullyApplied = false;
}
combinedFilter = FilterPredicateUtils.or(combinedFilter, parquetPredicate);
}
return combinedFilter;
}
Aggregations