Search in sources :

Example 56 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandler method pushdownPredicate.

/**
 * Attempts to push down at basic Filter predicate into HBase.
 *
 * @param isNative True if the values are stored in HBase using native byte[] vs being serialized as Strings.
 * @param constraints The constraints that we can attempt to push into HBase as part of the scan.
 * @return A filter if we found a predicate we can push down, null otherwise/
 * @note Currently this method only supports constraints that can be represented by HBase's SingleColumnValueFilter
 * or RowFilter and CompareOp of EQUAL. In the future we can add > and < for certain field types.
 */
private Filter pushdownPredicate(boolean isNative, Constraints constraints) {
    for (Map.Entry<String, ValueSet> next : constraints.getSummary().entrySet()) {
        if (next.getValue().isSingleValue() && !next.getValue().isNullAllowed()) {
            byte[] value = HbaseSchemaUtils.toBytes(isNative, next.getValue().getSingleValue());
            String[] colParts = HbaseSchemaUtils.extractColumnParts(next.getKey());
            CompareFilter.CompareOp compareOp = CompareFilter.CompareOp.EQUAL;
            boolean isRowKey = next.getKey().equals(HbaseSchemaUtils.ROW_COLUMN_NAME);
            return isRowKey ? new RowFilter(compareOp, new BinaryComparator(value)) : new SingleColumnValueFilter(colParts[0].getBytes(), colParts[1].getBytes(), compareOp, value);
        }
    }
    return null;
}
Also used : RowFilter(org.apache.hadoop.hbase.filter.RowFilter) SingleColumnValueFilter(org.apache.hadoop.hbase.filter.SingleColumnValueFilter) CompareFilter(org.apache.hadoop.hbase.filter.CompareFilter) Map(java.util.Map) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) BinaryComparator(org.apache.hadoop.hbase.filter.BinaryComparator)

Example 57 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class BigQuerySqlUtilsTest method testSqlWithConstraintsRanges.

@Test
public void testSqlWithConstraintsRanges() throws Exception {
    Map<String, ValueSet> constraintMap = new LinkedHashMap<>();
    ValueSet rangeSet = SortedRangeSet.newBuilder(INT_TYPE, true).add(new Range(Marker.above(new BlockAllocatorImpl(), INT_TYPE, 10), Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 20))).build();
    ValueSet isNullRangeSet = SortedRangeSet.newBuilder(INT_TYPE, true).build();
    ValueSet isNonNullRangeSet = SortedRangeSet.newBuilder(INT_TYPE, false).add(new Range(Marker.lowerUnbounded(new BlockAllocatorImpl(), INT_TYPE), Marker.upperUnbounded(new BlockAllocatorImpl(), INT_TYPE))).build();
    ValueSet stringRangeSet = SortedRangeSet.newBuilder(STRING_TYPE, false).add(new Range(Marker.exactly(new BlockAllocatorImpl(), STRING_TYPE, "a_low"), Marker.below(new BlockAllocatorImpl(), STRING_TYPE, "z_high"))).build();
    ValueSet booleanRangeSet = SortedRangeSet.newBuilder(BOOLEAN_TYPE, false).add(new Range(Marker.exactly(new BlockAllocatorImpl(), BOOLEAN_TYPE, true), Marker.exactly(new BlockAllocatorImpl(), BOOLEAN_TYPE, true))).build();
    ValueSet integerInRangeSet = SortedRangeSet.newBuilder(INT_TYPE, false).add(new Range(Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 10), Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 10))).add(new Range(Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 1000_000), Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 1000_000))).build();
    constraintMap.put("integerRange", rangeSet);
    constraintMap.put("isNullRange", isNullRangeSet);
    constraintMap.put("isNotNullRange", isNonNullRangeSet);
    constraintMap.put("stringRange", stringRangeSet);
    constraintMap.put("booleanRange", booleanRangeSet);
    constraintMap.put("integerInRange", integerInRangeSet);
    Mockito.when(split.getProperties()).thenReturn(Collections.emptyMap());
    final List<QueryParameterValue> expectedParameterValues = ImmutableList.of(QueryParameterValue.int64(10), QueryParameterValue.int64(20), QueryParameterValue.string("a_low"), QueryParameterValue.string("z_high"), QueryParameterValue.bool(true), QueryParameterValue.int64(10), QueryParameterValue.int64(1000000));
    try (Constraints constraints = new Constraints(constraintMap)) {
        List<QueryParameterValue> parameterValues = new ArrayList<>();
        String sql = BigQuerySqlUtils.buildSqlFromSplit(tableName, makeSchema(constraintMap), constraints, split, parameterValues);
        assertEquals(expectedParameterValues, parameterValues);
        assertEquals("SELECT `integerRange`,`isNullRange`,`isNotNullRange`,`stringRange`,`booleanRange`,`integerInRange` from `schema`.`table` " + "WHERE ((integerRange IS NULL) OR (`integerRange` > ? AND `integerRange` <= ?)) " + "AND (isNullRange IS NULL) AND (isNotNullRange IS NOT NULL) " + "AND ((`stringRange` >= ? AND `stringRange` < ?)) " + "AND (`booleanRange` = ?) " + "AND (`integerInRange` IN (?,?))", sql);
    }
}
Also used : QueryParameterValue(com.google.cloud.bigquery.QueryParameterValue) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) ArrayList(java.util.ArrayList) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 58 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class HbaseRecordHandlerTest method doReadRecordsSpill.

@Test
public void doReadRecordsSpill() throws Exception {
    List<Result> results = TestUtils.makeResults(10_000);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 0L)), true));
    S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
    Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
    ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
    1_500_000L, 0L);
    RecordResponse rawResponse = handler.doReadRecords(allocator, request);
    assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
    try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
        logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
        assertTrue(response.getNumberBlocks() > 1);
        int blockNum = 0;
        for (SpillLocation next : response.getRemoteBlocks()) {
            S3SpillLocation spillLocation = (S3SpillLocation) next;
            try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
                logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
                // assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
                logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
                assertNotNull(BlockUtils.rowToString(block, 0));
            }
        }
    }
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) RemoteReadRecordsResponse(com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse) SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) HashMap(java.util.HashMap) Matchers.anyString(org.mockito.Matchers.anyString) RecordResponse(com.amazonaws.athena.connector.lambda.records.RecordResponse) Result(org.apache.hadoop.hbase.client.Result) PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) ReadRecordsRequest(com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) S3SpillLocation(com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation) Block(com.amazonaws.athena.connector.lambda.data.Block) Scan(org.apache.hadoop.hbase.client.Scan) ResultProcessor(com.amazonaws.athena.connectors.hbase.connection.ResultProcessor) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) Test(org.junit.Test)

Example 59 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class GetTableLayoutRequestSerDeTest method beforeTest.

@Before
public void beforeTest() throws IOException {
    Schema schema = SchemaBuilder.newBuilder().addField("year", new ArrowType.Int(32, true)).addField("month", new ArrowType.Int(32, true)).addField("day", new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).addField("col3", Types.MinorType.FLOAT8.getType()).addField("col4", Types.MinorType.FLOAT8.getType()).addField("col5", Types.MinorType.FLOAT8.getType()).build();
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
    constraintsMap.put("col4", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
    constraintsMap.put("col5", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
    Constraints constraints = new Constraints(constraintsMap);
    expected = new GetTableLayoutRequest(federatedIdentity, "test-query-id", "test-catalog", new TableName("test-schema", "test-table"), constraints, schema, ImmutableSet.of("year", "month", "day"));
    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "GetTableLayoutRequest.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
Also used : HashMap(java.util.HashMap) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) AllOrNoneValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.AllOrNoneValueSet) Before(org.junit.Before)

Example 60 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class PostGreSqlRecordHandlerTest method getRangeSet.

private ValueSet getRangeSet(Marker.Bound lowerBound, Object lowerValue, Marker.Bound upperBound, Object upperValue) {
    Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range.isSingleValue()).thenReturn(false);
    Mockito.when(range.getLow().getBound()).thenReturn(lowerBound);
    Mockito.when(range.getLow().getValue()).thenReturn(lowerValue);
    Mockito.when(range.getHigh().getBound()).thenReturn(upperBound);
    Mockito.when(range.getHigh().getValue()).thenReturn(upperValue);
    ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
    return valueSet;
}
Also used : Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Aggregations

ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)104 Test (org.junit.Test)66 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)63 HashMap (java.util.HashMap)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)47 Schema (org.apache.arrow.vector.types.pojo.Schema)37 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)27 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)26 ArrayList (java.util.ArrayList)25 Matchers.anyString (org.mockito.Matchers.anyString)25 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)24 Block (com.amazonaws.athena.connector.lambda.data.Block)23 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)21 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)18 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)17 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)17 InvocationOnMock (org.mockito.invocation.InvocationOnMock)17 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)13