use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class HbaseRecordHandler method pushdownPredicate.
/**
* Attempts to push down at basic Filter predicate into HBase.
*
* @param isNative True if the values are stored in HBase using native byte[] vs being serialized as Strings.
* @param constraints The constraints that we can attempt to push into HBase as part of the scan.
* @return A filter if we found a predicate we can push down, null otherwise/
* @note Currently this method only supports constraints that can be represented by HBase's SingleColumnValueFilter
* or RowFilter and CompareOp of EQUAL. In the future we can add > and < for certain field types.
*/
private Filter pushdownPredicate(boolean isNative, Constraints constraints) {
for (Map.Entry<String, ValueSet> next : constraints.getSummary().entrySet()) {
if (next.getValue().isSingleValue() && !next.getValue().isNullAllowed()) {
byte[] value = HbaseSchemaUtils.toBytes(isNative, next.getValue().getSingleValue());
String[] colParts = HbaseSchemaUtils.extractColumnParts(next.getKey());
CompareFilter.CompareOp compareOp = CompareFilter.CompareOp.EQUAL;
boolean isRowKey = next.getKey().equals(HbaseSchemaUtils.ROW_COLUMN_NAME);
return isRowKey ? new RowFilter(compareOp, new BinaryComparator(value)) : new SingleColumnValueFilter(colParts[0].getBytes(), colParts[1].getBytes(), compareOp, value);
}
}
return null;
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class BigQuerySqlUtilsTest method testSqlWithConstraintsRanges.
@Test
public void testSqlWithConstraintsRanges() throws Exception {
Map<String, ValueSet> constraintMap = new LinkedHashMap<>();
ValueSet rangeSet = SortedRangeSet.newBuilder(INT_TYPE, true).add(new Range(Marker.above(new BlockAllocatorImpl(), INT_TYPE, 10), Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 20))).build();
ValueSet isNullRangeSet = SortedRangeSet.newBuilder(INT_TYPE, true).build();
ValueSet isNonNullRangeSet = SortedRangeSet.newBuilder(INT_TYPE, false).add(new Range(Marker.lowerUnbounded(new BlockAllocatorImpl(), INT_TYPE), Marker.upperUnbounded(new BlockAllocatorImpl(), INT_TYPE))).build();
ValueSet stringRangeSet = SortedRangeSet.newBuilder(STRING_TYPE, false).add(new Range(Marker.exactly(new BlockAllocatorImpl(), STRING_TYPE, "a_low"), Marker.below(new BlockAllocatorImpl(), STRING_TYPE, "z_high"))).build();
ValueSet booleanRangeSet = SortedRangeSet.newBuilder(BOOLEAN_TYPE, false).add(new Range(Marker.exactly(new BlockAllocatorImpl(), BOOLEAN_TYPE, true), Marker.exactly(new BlockAllocatorImpl(), BOOLEAN_TYPE, true))).build();
ValueSet integerInRangeSet = SortedRangeSet.newBuilder(INT_TYPE, false).add(new Range(Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 10), Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 10))).add(new Range(Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 1000_000), Marker.exactly(new BlockAllocatorImpl(), INT_TYPE, 1000_000))).build();
constraintMap.put("integerRange", rangeSet);
constraintMap.put("isNullRange", isNullRangeSet);
constraintMap.put("isNotNullRange", isNonNullRangeSet);
constraintMap.put("stringRange", stringRangeSet);
constraintMap.put("booleanRange", booleanRangeSet);
constraintMap.put("integerInRange", integerInRangeSet);
Mockito.when(split.getProperties()).thenReturn(Collections.emptyMap());
final List<QueryParameterValue> expectedParameterValues = ImmutableList.of(QueryParameterValue.int64(10), QueryParameterValue.int64(20), QueryParameterValue.string("a_low"), QueryParameterValue.string("z_high"), QueryParameterValue.bool(true), QueryParameterValue.int64(10), QueryParameterValue.int64(1000000));
try (Constraints constraints = new Constraints(constraintMap)) {
List<QueryParameterValue> parameterValues = new ArrayList<>();
String sql = BigQuerySqlUtils.buildSqlFromSplit(tableName, makeSchema(constraintMap), constraints, split, parameterValues);
assertEquals(expectedParameterValues, parameterValues);
assertEquals("SELECT `integerRange`,`isNullRange`,`isNotNullRange`,`stringRange`,`booleanRange`,`integerInRange` from `schema`.`table` " + "WHERE ((integerRange IS NULL) OR (`integerRange` > ? AND `integerRange` <= ?)) " + "AND (isNullRange IS NULL) AND (isNotNullRange IS NOT NULL) " + "AND ((`stringRange` >= ? AND `stringRange` < ?)) " + "AND (`booleanRange` = ?) " + "AND (`integerInRange` IN (?,?))", sql);
}
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class HbaseRecordHandlerTest method doReadRecordsSpill.
@Test
public void doReadRecordsSpill() throws Exception {
List<Result> results = TestUtils.makeResults(10_000);
ResultScanner mockScanner = mock(ResultScanner.class);
when(mockScanner.iterator()).thenReturn(results.iterator());
when(mockClient.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
return processor.scan(mockScanner);
});
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("family1:col3", SortedRangeSet.copyOf(Types.MinorType.BIGINT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.BIGINT.getType(), 0L)), true));
S3SpillLocation splitLoc = S3SpillLocation.newBuilder().withBucket(UUID.randomUUID().toString()).withSplitId(UUID.randomUUID().toString()).withQueryId(UUID.randomUUID().toString()).withIsDirectory(true).build();
Split.Builder splitBuilder = Split.newBuilder(splitLoc, keyFactory.create()).add(HBASE_CONN_STR, "fake_con_str").add(START_KEY_FIELD, "fake_start_key").add(END_KEY_FIELD, "fake_end_key").add(REGION_ID_FIELD, "fake_region_id").add(REGION_NAME_FIELD, "fake_region_name");
ReadRecordsRequest request = new ReadRecordsRequest(IDENTITY, DEFAULT_CATALOG, "queryId-" + System.currentTimeMillis(), new TableName(DEFAULT_SCHEMA, TEST_TABLE), schemaForRead, splitBuilder.build(), new Constraints(constraintsMap), // ~1.5MB so we should see some spill
1_500_000L, 0L);
RecordResponse rawResponse = handler.doReadRecords(allocator, request);
assertTrue(rawResponse instanceof RemoteReadRecordsResponse);
try (RemoteReadRecordsResponse response = (RemoteReadRecordsResponse) rawResponse) {
logger.info("doReadRecordsSpill: remoteBlocks[{}]", response.getRemoteBlocks().size());
assertTrue(response.getNumberBlocks() > 1);
int blockNum = 0;
for (SpillLocation next : response.getRemoteBlocks()) {
S3SpillLocation spillLocation = (S3SpillLocation) next;
try (Block block = spillReader.read(spillLocation, response.getEncryptionKey(), response.getSchema())) {
logger.info("doReadRecordsSpill: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());
// assertTrue(++blockNum < response.getRemoteBlocks().size() && block.getRowCount() > 10_000);
logger.info("doReadRecordsSpill: {}", BlockUtils.rowToString(block, 0));
assertNotNull(BlockUtils.rowToString(block, 0));
}
}
}
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class GetTableLayoutRequestSerDeTest method beforeTest.
@Before
public void beforeTest() throws IOException {
Schema schema = SchemaBuilder.newBuilder().addField("year", new ArrowType.Int(32, true)).addField("month", new ArrowType.Int(32, true)).addField("day", new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).addField("col3", Types.MinorType.FLOAT8.getType()).addField("col4", Types.MinorType.FLOAT8.getType()).addField("col5", Types.MinorType.FLOAT8.getType()).build();
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
constraintsMap.put("col4", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
constraintsMap.put("col5", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
Constraints constraints = new Constraints(constraintsMap);
expected = new GetTableLayoutRequest(federatedIdentity, "test-query-id", "test-catalog", new TableName("test-schema", "test-table"), constraints, schema, ImmutableSet.of("year", "month", "day"));
String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "GetTableLayoutRequest.json");
expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.
the class PostGreSqlRecordHandlerTest method getRangeSet.
private ValueSet getRangeSet(Marker.Bound lowerBound, Object lowerValue, Marker.Bound upperBound, Object upperValue) {
Range range = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range.isSingleValue()).thenReturn(false);
Mockito.when(range.getLow().getBound()).thenReturn(lowerBound);
Mockito.when(range.getLow().getValue()).thenReturn(lowerValue);
Mockito.when(range.getHigh().getBound()).thenReturn(upperBound);
Mockito.when(range.getHigh().getValue()).thenReturn(upperValue);
ValueSet valueSet = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(valueSet.getRanges().getOrderedRanges()).thenReturn(Collections.singletonList(range));
return valueSet;
}
Aggregations