Search in sources :

Example 26 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class SqlServerRecordHandlerTest method getSingleValueSet.

private ValueSet getSingleValueSet(Object value) {
    Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range.isSingleValue()).thenReturn(true);
    Mockito.when(range.getLow().getValue()).thenReturn(value);
    ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
    return valueSet;
}
Also used : Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 27 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class PredicateBuilder method toPredicate.

private static String toPredicate(String columnName, ValueSet valueSet, ArrowType type, HashMap<String, PredicateBuilder.TypeAndValue> accumulator) {
    List<String> disjuncts = new ArrayList<>();
    List<Object> singleValues = new ArrayList<>();
    if (valueSet instanceof SortedRangeSet) {
        if (valueSet.isNone() && valueSet.isNullAllowed()) {
            return String.format("(%s IS NULL)", columnName);
        }
        if (valueSet.isNullAllowed()) {
            disjuncts.add(String.format("(%s IS NULL)", columnName));
        }
        Range rangeSpan = ((SortedRangeSet) valueSet).getSpan();
        if (!valueSet.isNullAllowed() && rangeSpan.getLow().isLowerUnbounded() && rangeSpan.getHigh().isUpperUnbounded()) {
            return String.format("(%s IS NOT NULL)", columnName);
        }
        for (Range range : valueSet.getRanges().getOrderedRanges()) {
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeConjuncts.add(toPredicate(columnName, ">", range.getLow().getValue(), type, accumulator));
                            break;
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, ">=", range.getLow().getValue(), type, accumulator));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, "<=", range.getHigh().getValue(), type, accumulator));
                            break;
                        case BELOW:
                            rangeConjuncts.add(toPredicate(columnName, "<", range.getHigh().getValue(), type, accumulator));
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
                // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
                Preconditions.checkState(!rangeConjuncts.isEmpty());
                disjuncts.add("(" + Joiner.on(" AND ").join(rangeConjuncts) + ")");
            }
        }
        // Add back all of the possible single values either as an equality or an IN predicate
        if (singleValues.size() == 1) {
            disjuncts.add(toPredicate(columnName, "=", Iterables.getOnlyElement(singleValues), type, accumulator));
        } else if (singleValues.size() > 1) {
            for (Object value : singleValues) {
                accumulator.put(columnName, new PredicateBuilder.TypeAndValue(type, value));
            }
            String values = Joiner.on(",").join(Collections.nCopies(singleValues.size(), "<" + columnName + ">"));
            disjuncts.add(quote(columnName) + " IN (" + values + ")");
        }
    }
    return "(" + Joiner.on(" OR ").join(disjuncts) + ")";
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) ArrayList(java.util.ArrayList) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range)

Example 28 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class PredicateBuilder method toPredicate.

private static String toPredicate(String columnName, ValueSet valueSet) {
    List<String> disjuncts = new ArrayList<>();
    List<Object> singleValues = new ArrayList<>();
    if (valueSet instanceof SortedRangeSet) {
        if (valueSet.isNone() && valueSet.isNullAllowed()) {
            return String.format("(%s IS NULL)", columnName);
        }
        if (valueSet.isNullAllowed()) {
            disjuncts.add(String.format("(%s IS NULL)", columnName));
        }
        Range rangeSpan = ((SortedRangeSet) valueSet).getSpan();
        if (!valueSet.isNullAllowed() && rangeSpan.getLow().isLowerUnbounded() && rangeSpan.getHigh().isUpperUnbounded()) {
            return String.format("(%s IS NOT NULL)", columnName);
        }
        for (Range range : valueSet.getRanges().getOrderedRanges()) {
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            rangeConjuncts.add(toPredicate(columnName, ">", range.getLow().getValue(), valueSet.getType()));
                            break;
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, ">=", range.getLow().getValue(), valueSet.getType()));
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            rangeConjuncts.add(toPredicate(columnName, "<=", range.getHigh().getValue(), valueSet.getType()));
                            break;
                        case BELOW:
                            rangeConjuncts.add(toPredicate(columnName, "<", range.getHigh().getValue(), valueSet.getType()));
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
                // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
                Preconditions.checkState(!rangeConjuncts.isEmpty());
                disjuncts.add("(" + Joiner.on(" AND ").join(rangeConjuncts) + ")");
            }
        }
        // Add back all of the possible single values either as an equality or an IN predicate
        if (singleValues.size() == 1) {
            disjuncts.add(toPredicate(columnName, "=", Iterables.getOnlyElement(singleValues), valueSet.getType()));
        } else if (singleValues.size() > 1) {
            List<String> values = singleValues.stream().map(next -> quoteValue(next, valueSet.getType())).collect(Collectors.toList());
            String valuesStr = Joiner.on(",").join(values);
            disjuncts.add(quoteColumn(columnName) + " IN (" + valuesStr + ")");
        }
    } else if (valueSet instanceof EquatableValueSet) {
        List<String> values = new ArrayList<>();
        for (int i = 0; i < ((EquatableValueSet) valueSet).getValueBlock().getRowCount(); i++) {
            values.add(quoteValue(((EquatableValueSet) valueSet).getValue(i), valueSet.getType()));
        }
        String valuesStr = Joiner.on(",").join(values);
        disjuncts.add(quoteColumn(columnName) + " IN (" + valuesStr + ")");
    }
    return "(" + Joiner.on(" OR ").join(disjuncts) + ")";
}
Also used : SortedRangeSet(com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range)

Example 29 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class HiveRecordHandlerTest method buildSplitSql.

@Test
public void buildSplitSql() throws SQLException {
    TableName tableName = new TableName("testSchema", "testTable");
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARBINARY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("partition", Types.MinorType.VARCHAR.getType()).build());
    Schema schema = schemaBuilder.build();
    Split split = Mockito.mock(Split.class);
    Mockito.when(split.getProperties()).thenReturn(Collections.singletonMap("partition", "p0"));
    Mockito.when(split.getProperty(Mockito.eq("partition"))).thenReturn("p0");
    Range range1a = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range1a.isSingleValue()).thenReturn(true);
    Mockito.when(range1a.getLow().getValue()).thenReturn(1);
    Range range1b = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range1b.isSingleValue()).thenReturn(true);
    Mockito.when(range1b.getLow().getValue()).thenReturn(2);
    ValueSet valueSet1 = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet1.getRanges().getOrderedRanges()).thenReturn(ImmutableList.of(range1a, range1b));
    final long dateDays = TimeUnit.DAYS.toDays(Date.valueOf("2020-01-05").getTime());
    ValueSet valueSet2 = getSingleValueSet(dateDays);
    Constraints constraints = Mockito.mock(Constraints.class);
    Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol2", valueSet2).build());
    PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(Mockito.anyString())).thenReturn(expectedPreparedStatement);
    PreparedStatement preparedStatement = this.hiveRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Assert.assertEquals(expectedPreparedStatement, preparedStatement);
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Schema(org.apache.arrow.vector.types.pojo.Schema) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) PreparedStatement(java.sql.PreparedStatement) Split(com.amazonaws.athena.connector.lambda.domain.Split) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 30 with Range

use of com.amazonaws.athena.connector.lambda.domain.predicate.Range in project aws-athena-query-federation by awslabs.

the class ImpalaRecordHandlerTest method buildSplitSql.

@Test
public void buildSplitSql() throws SQLException {
    TableName tableName = new TableName("testSchema", "testTable");
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARBINARY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("partition", Types.MinorType.VARCHAR.getType()).build());
    Schema schema = schemaBuilder.build();
    Split split = Mockito.mock(Split.class);
    Mockito.when(split.getProperties()).thenReturn(Collections.singletonMap("partition", "p0"));
    Mockito.when(split.getProperty(Mockito.eq("partition"))).thenReturn("p0");
    Range range1a = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range1a.isSingleValue()).thenReturn(true);
    Mockito.when(range1a.getLow().getValue()).thenReturn(1);
    Range range1b = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range1b.isSingleValue()).thenReturn(true);
    Mockito.when(range1b.getLow().getValue()).thenReturn(2);
    ValueSet valueSet1 = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet1.getRanges().getOrderedRanges()).thenReturn(ImmutableList.of(range1a, range1b));
    final long dateDays = TimeUnit.DAYS.toDays(Date.valueOf("2020-01-05").getTime());
    ValueSet valueSet2 = getSingleValueSet(dateDays);
    Constraints constraints = Mockito.mock(Constraints.class);
    Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol2", valueSet2).build());
    PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(Mockito.anyString())).thenReturn(expectedPreparedStatement);
    PreparedStatement preparedStatement = this.impalaRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Assert.assertEquals(expectedPreparedStatement, preparedStatement);
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Schema(org.apache.arrow.vector.types.pojo.Schema) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) PreparedStatement(java.sql.PreparedStatement) Split(com.amazonaws.athena.connector.lambda.domain.Split) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Aggregations

Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)34 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)26 ArrayList (java.util.ArrayList)11 SortedRangeSet (com.amazonaws.athena.connector.lambda.domain.predicate.SortedRangeSet)10 Split (com.amazonaws.athena.connector.lambda.domain.Split)8 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)8 Test (org.junit.Test)8 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)7 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)7 PreparedStatement (java.sql.PreparedStatement)7 Schema (org.apache.arrow.vector.types.pojo.Schema)7 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 FieldBuilder (com.amazonaws.athena.connector.lambda.data.FieldBuilder)2 JdbcSplitQueryBuilder (com.amazonaws.athena.connectors.jdbc.manager.JdbcSplitQueryBuilder)2 List (java.util.List)2 Set (java.util.Set)2 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)2 Block (com.amazonaws.athena.connector.lambda.data.Block)1 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)1