use of org.apache.druid.data.input.StringTuple in project druid by druid-io.
the class DimensionRangeShardSpecTest method testPossibleInDomain_nonNullValues_pruningScenarios.
@Test
public void testPossibleInDomain_nonNullValues_pruningScenarios() {
setDimensions("planet", "country", "city");
final StringTuple start = StringTuple.create("Earth", "France", "Paris");
final StringTuple end = StringTuple.create("Earth", "USA", "New York");
final RangeSet<String> universalSet = TreeRangeSet.create();
universalSet.add(Range.all());
ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0, null);
Map<String, RangeSet<String>> domain = new HashMap<>();
// (-INF, Earth) U (Earth, INF) * (-INF, INF) * (-INF, INF)
populateDomain(domain, getUnion(getRangeSet(Range.lessThan("Earth")), getRangeSet(Range.greaterThan("Earth"))), // EffectiveDomain[:1] == {} -> PRUNE
universalSet, universalSet);
assertFalse(shard.possibleInDomain(domain));
// (-INF, INF) * (-INF, "France") * (-INF, INF)
populateDomain(domain, universalSet, // EffectiveDomain[:1] == {Earth} == {start[:1]} == {end[:1]}
getRangeSet(Range.lessThan("France")), // EffectiveDomain[:2] == {} -> PRUNE
universalSet);
assertFalse(shard.possibleInDomain(domain));
// (-INF, INF) * (-INF, "France] * (-INF, Paris)
populateDomain(domain, universalSet, // EffectiveDomain[:1] == {Earth} == {start[:1]} == {end[:1]}
getRangeSet(Range.atMost("France")), // EffectiveDomain[:2] == {<Earth, France>} == {start[:2]}
getRangeSet(Range.lessThan("Paris")));
assertFalse(shard.possibleInDomain(domain));
// {Earth} * {USA} * (New York, INF)
populateDomain(domain, getRangeSet(Range.singleton("Earth")), // EffectiveDomain[:1] == {Earth} == {start[:1]} == {end[:1]}
getRangeSet(Range.singleton("USA")), // EffectiveDomain[:2] == {<Earth, USA>} == {end[:2]}
getRangeSet(Range.greaterThan("New York")));
assertFalse(shard.possibleInDomain(domain));
}
use of org.apache.druid.data.input.StringTuple in project druid by druid-io.
the class DimensionRangeShardSpec method isInChunk.
public static boolean isInChunk(List<String> dimensions, @Nullable StringTuple start, @Nullable StringTuple end, InputRow inputRow) {
final String[] inputDimensionValues = new String[dimensions.size()];
for (int i = 0; i < dimensions.size(); ++i) {
// Get the values of this dimension, treat multiple values as null
List<String> values = inputRow.getDimension(dimensions.get(i));
inputDimensionValues[i] = values != null && values.size() == 1 ? values.get(0) : null;
}
final StringTuple inputRowTuple = StringTuple.create(inputDimensionValues);
int inputVsStart = inputRowTuple.compareTo(start);
int inputVsEnd = inputRowTuple.compareTo(end);
return (inputVsStart >= 0 || start == null) && (inputVsEnd < 0 || end == null);
}
use of org.apache.druid.data.input.StringTuple in project druid by druid-io.
the class DimensionRangeShardSpecTest method testPossibleInDomain_withNullStart.
@Test
public void testPossibleInDomain_withNullStart() {
setDimensions("planet", "country", "city");
// considered to be (-INF, -INF, -INF)
final StringTuple start = null;
final StringTuple end = StringTuple.create("Saturn", "Foo", "Bar");
final RangeSet<String> universalSet = TreeRangeSet.create();
universalSet.add(Range.all());
ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0, null);
Map<String, RangeSet<String>> domain = new HashMap<>();
// {Mars} * {Zoo, Zuu} * {Blah, Random}
populateDomain(domain, getRangeSet(Range.singleton("Mars")), // EffectiveDomain[:1].size > 1 -> ACCEPT
getUnion(getRangeSet(Range.singleton("Zoo")), getRangeSet(Range.singleton("Zuu"))), getUnion(getRangeSet(Range.singleton("Blah")), getRangeSet(Range.singleton("Random"))));
assertTrue(shard.possibleInDomain(domain));
// {Saturn} * (-INF, INF) * (-INF, INF)
populateDomain(domain, getRangeSet(Range.singleton("Saturn")), // EffectiveDomain[:1] == {end[:1]}
universalSet, // EffectiveDomain[:2].size > 1 -> ACCEPT
universalSet);
assertTrue(shard.possibleInDomain(domain));
// {Saturn} * {Zoo} * (-INF, INF)
populateDomain(domain, getRangeSet(Range.singleton("Saturn")), // EffectiveDomain[:1] == {end[:1]}
getRangeSet(Range.singleton("Zoo")), // EffectiveDomain[:2] == {} -> PRUNE
universalSet);
assertFalse(shard.possibleInDomain(domain));
// (Xeon) * (-INF, INF) * (-INF, INF)
populateDomain(domain, getRangeSet(Range.singleton("Xeon")), // EffectiveDomain[:1] == {} -> PRUNE
universalSet, universalSet);
assertFalse(shard.possibleInDomain(domain));
}
use of org.apache.druid.data.input.StringTuple in project druid by druid-io.
the class DimensionRangeShardSpec method possibleInDomain.
/**
* Set[:i] is the cartesian product of Set[0],...,Set[i - 1]
* EffectiveDomain[:i] is defined as QueryDomain[:i] INTERSECTION SegmentRange[:i]
*
* i = 1
* If EffectiveDomain[:i] == {start[:i]} || EffectiveDomain == {end[:i]}:
* if i == index.dimensions.size:
* ACCEPT segment
* else:
* REPEAT with i = i + 1
*else if EffectiveDomain[:i] == {}:
* PRUNE segment
*else:
* ACCEPT segment
*
* Example: Index on (Hour, Minute, Second). Index.size is 3
* I)
* start = (3, 25, 10)
* end = (5, 10, 30)
* query domain = {3} * [0, 10] * {10, 20, 30, 40}
* EffectiveDomain[:1] == {3} == start[:1]
* EffectiveDomain[:2] == {3} * ([0, 10] INTERSECTION [25, INF))
* == {} -> PRUNE
*
* II)
* start = (3, 25, 10)
* end = (5, 15, 30)
* query domain = {4} * [0, 10] * {10, 20, 30, 40}
* EffectiveDomain[:1] == {4} (!= {} && != start[:1] && != {end[:1]}) -> ACCEPT
*
* III)
* start = (3, 25, 10)
* end = (5, 15, 30)
* query domain = {5} * [0, 10] * {10, 20, 30, 40}
* EffectiveDomain[:1] == {5} == end[:1]
* EffectiveDomain[:2] == {5} * ([0, 10] INTERSECTION (-INF, 15])
* == {5} * [0, 10] (! ={} && != {end[:2]}) -> ACCEPT
*
* IV)
* start = (3, 25, 10)
* end = (5, 15, 30)
* query domain = {5} * [15, 40] * {10, 20, 30, 40}
* EffectiveDomain[:1] == {5} == end[:1]
* EffectiveDomain[:2] == {5} * ([15, 40] INTERSECTION (-INF, 15])
* == {5} * {15} == {end[:2]}
* EffectiveDomain[:3] == {5} * {15} * ({10, 20, 30, 40} * (-INF, 30])
* == {5} * {15} * {10, 20, 30} != {} -> ACCEPT
*
* V)
* start = (3, 25, 10)
* end = (5, 15, 30)
* query domain = {5} * [15, 40] * {50}
* EffectiveDomain[:1] == {5} == end[:1]
* EffectiveDomain[:2] == {5} * ([15, 40] INTERSECTION (-INF, 15])
* == {5} * {15} == {end[:2]}
* EffectiveDomain[:3] == {5} * {15} * ({40} * (-INF, 30])
* == {5} * {15} * {}
* == {} -> PRUNE
*
* @param domain The domain inferred from the query. Assumed to be non-emtpy
* @return true if segment needs to be considered for query, false if it can be pruned
*/
@Override
public boolean possibleInDomain(Map<String, RangeSet<String>> domain) {
final StringTuple segmentStart = start == null ? new StringTuple(new String[dimensions.size()]) : start;
final StringTuple segmentEnd = end == null ? new StringTuple(new String[dimensions.size()]) : end;
// Indicates if the effective domain is equivalent to {start} till the previous dimension
boolean effectiveDomainIsStart = true;
// Indicates if the effective domain is equivalent to {end} till the previous dimension
boolean effectiveDomainIsEnd = true;
for (int i = 0; i < dimensions.size(); i++) {
String dimension = dimensions.get(i);
RangeSet<String> queryDomainForDimension = domain.get(dimension);
if (queryDomainForDimension == null) {
queryDomainForDimension = TreeRangeSet.create();
queryDomainForDimension.add(Range.all());
}
// Compute the segment's range for given dimension based on its start, end and boundary conditions
Range<String> rangeTillSegmentBoundary = Range.all();
if (effectiveDomainIsStart && segmentStart.get(i) != null) {
rangeTillSegmentBoundary = rangeTillSegmentBoundary.intersection(Range.atLeast(segmentStart.get(i)));
}
if (effectiveDomainIsEnd && segmentEnd.get(i) != null) {
rangeTillSegmentBoundary = rangeTillSegmentBoundary.intersection(Range.atMost(segmentEnd.get(i)));
}
// EffectiveDomain[i] = QueryDomain[i] INTERSECTION SegmentRange[i]
RangeSet<String> effectiveDomainForDimension = queryDomainForDimension.subRangeSet(rangeTillSegmentBoundary);
// Prune segment because query domain is out of segment range
if (effectiveDomainForDimension.isEmpty()) {
return false;
}
// EffectiveDomain is singleton and lies only on the boundaries -> consider next dimensions
effectiveDomainIsStart = effectiveDomainIsStart && isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentStart.get(i));
effectiveDomainIsEnd = effectiveDomainIsEnd && isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentEnd.get(i));
// EffectiveDomain lies within the boundaries as well -> cannot prune based on next dimensions
if (!effectiveDomainIsStart && !effectiveDomainIsEnd) {
return true;
}
}
return true;
}
use of org.apache.druid.data.input.StringTuple in project druid by druid-io.
the class PartialDimensionDistributionTask method determineDistribution.
private Map<Interval, StringDistribution> determineDistribution(HandlingInputRowIterator inputRowIterator, GranularitySpec granularitySpec, List<String> partitionDimensions, boolean isAssumeGrouped) {
Map<Interval, StringDistribution> intervalToDistribution = new HashMap<>();
InputRowFilter inputRowFilter = !isAssumeGrouped && granularitySpec.isRollup() ? dedupInputRowFilterSupplier.get() : new PassthroughInputRowFilter();
while (inputRowIterator.hasNext()) {
InputRow inputRow = inputRowIterator.next();
if (inputRow == null) {
continue;
}
final Interval interval;
if (granularitySpec.inputIntervals().isEmpty()) {
interval = granularitySpec.getSegmentGranularity().bucket(inputRow.getTimestamp());
} else {
final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
// this interval must exist since it passed the rowFilter
assert optInterval.isPresent();
interval = optInterval.get();
}
String[] values = new String[partitionDimensions.size()];
for (int i = 0; i < partitionDimensions.size(); ++i) {
List<String> dimensionValues = inputRow.getDimension(partitionDimensions.get(i));
if (dimensionValues != null && !dimensionValues.isEmpty()) {
values[i] = Iterables.getOnlyElement(dimensionValues);
}
}
final StringTuple partitionDimensionValues = StringTuple.create(values);
if (inputRowFilter.accept(interval, partitionDimensionValues, inputRow)) {
StringDistribution stringDistribution = intervalToDistribution.computeIfAbsent(interval, k -> new StringSketch());
stringDistribution.put(partitionDimensionValues);
}
}
// DedupInputRowFilter may not accept the min/max dimensionValue. If needed, add the min/max
// values to the distributions so they have an accurate min/max.
inputRowFilter.getIntervalToMinPartitionDimensionValue().forEach((interval, min) -> intervalToDistribution.get(interval).putIfNewMin(min));
inputRowFilter.getIntervalToMaxPartitionDimensionValue().forEach((interval, max) -> intervalToDistribution.get(interval).putIfNewMax(max));
return intervalToDistribution;
}
Aggregations