use of com.amazonaws.athena.connector.lambda.domain.Split in project aws-athena-query-federation by awslabs.
the class ExampleMetadataHandlerTest method doGetSplits.
/**
* The goal of this test is to test happy case for getting splits and also to exercise the continuation token
* logic specifically.
*/
@Test
public void doGetSplits() {
logger.info("doGetSplits: enter");
String yearCol = "year";
String monthCol = "month";
String dayCol = "day";
// This is the schema that ExampleMetadataHandler has layed out for a 'Partition' so we need to populate this
// minimal set of info here.
Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(16, false)).addField(monthCol, new ArrowType.Int(16, false)).addField(dayCol, new ArrowType.Int(16, false)).addField(ExampleMetadataHandler.PARTITION_LOCATION, new ArrowType.Utf8()).addField(ExampleMetadataHandler.SERDE, new ArrowType.Utf8()).build();
List<String> partitionCols = new ArrayList<>();
partitionCols.add(yearCol);
partitionCols.add(monthCol);
partitionCols.add(dayCol);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(dayCol, SortedRangeSet.copyOf(Types.MinorType.INT.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.INT.getType(), 20)), false));
Block partitions = allocator.createBlock(schema);
int num_partitions = 100;
for (int i = 0; i < num_partitions; i++) {
BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
BlockUtils.setValue(partitions.getFieldVector(ExampleMetadataHandler.PARTITION_LOCATION), i, String.valueOf(i));
BlockUtils.setValue(partitions.getFieldVector(ExampleMetadataHandler.SERDE), i, "TextInputType");
}
partitions.setRowCount(num_partitions);
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(IdentityUtil.fakeIdentity(), "queryId", "catalog_name", new TableName("schema", "table_name"), partitions, partitionCols, new Constraints(constraintsMap), continuationToken);
int numContinuations = 0;
do {
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
ObjectMapperUtil.assertSerialization(req);
logger.info("doGetSplits: req[{}]", req);
metadataHandler.setEncryption(numContinuations % 2 == 0);
logger.info("doGetSplits: Toggle encryption " + (numContinuations % 2 == 0));
MetadataResponse rawResponse = metadataHandler.doGetSplits(allocator, req);
ObjectMapperUtil.assertSerialization(rawResponse);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetSplits: continuationToken[{}] - numSplits[{}] - maxSplits[{}]", new Object[] { continuationToken, response.getSplits().size(), MAX_SPLITS_PER_REQUEST });
for (Split nextSplit : response.getSplits()) {
if (numContinuations % 2 == 0) {
assertNotNull(nextSplit.getEncryptionKey());
} else {
assertNull(nextSplit.getEncryptionKey());
}
assertNotNull(nextSplit.getProperty(SplitProperties.LOCATION.getId()));
assertNotNull(nextSplit.getProperty(SplitProperties.SERDE.getId()));
assertNotNull(nextSplit.getProperty(SplitProperties.SPLIT_PART.getId()));
}
assertTrue("Continuation criteria violated", (response.getSplits().size() == MAX_SPLITS_PER_REQUEST && response.getContinuationToken() != null) || response.getSplits().size() < MAX_SPLITS_PER_REQUEST);
if (continuationToken != null) {
numContinuations++;
}
} while (continuationToken != null);
assertTrue(numContinuations > 0);
logger.info("doGetSplits: exit");
}
use of com.amazonaws.athena.connector.lambda.domain.Split in project aws-athena-query-federation by awslabs.
the class GetSplitsResponseSerDeTest method beforeTest.
@Before
public void beforeTest() throws IOException {
String yearCol = "year";
String monthCol = "month";
String dayCol = "day";
SpillLocation spillLocation1 = S3SpillLocation.newBuilder().withBucket("athena-virtuoso-test").withPrefix("lambda-spill").withQueryId("test-query-id").withSplitId("test-split-id-1").withIsDirectory(true).build();
EncryptionKey encryptionKey1 = new EncryptionKey("test-key-1".getBytes(), "test-nonce-1".getBytes());
Split split1 = Split.newBuilder(spillLocation1, encryptionKey1).add(yearCol, "2017").add(monthCol, "11").add(dayCol, "1").build();
SpillLocation spillLocation2 = S3SpillLocation.newBuilder().withBucket("athena-virtuoso-test").withPrefix("lambda-spill").withQueryId("test-query-id").withSplitId("test-split-id-2").withIsDirectory(true).build();
EncryptionKey encryptionKey2 = new EncryptionKey("test-key-2".getBytes(), "test-nonce-2".getBytes());
Split split2 = Split.newBuilder(spillLocation2, encryptionKey2).add(yearCol, "2017").add(monthCol, "11").add(dayCol, "2").build();
expected = new GetSplitsResponse("test-catalog", ImmutableSet.of(split1, split2), "test-continuation-token");
String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "GetSplitsResponse.json");
expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
use of com.amazonaws.athena.connector.lambda.domain.Split in project aws-athena-query-federation by awslabs.
the class ReadRecordsRequestSerDeTest method beforeTest.
@Before
public void beforeTest() throws IOException {
String yearCol = "year";
String monthCol = "month";
String dayCol = "day";
Schema schema = SchemaBuilder.newBuilder().addField(yearCol, new ArrowType.Int(32, true)).addField(monthCol, new ArrowType.Int(32, true)).addField(dayCol, new ArrowType.Int(32, true)).addField("col2", new ArrowType.Utf8()).addField("col3", Types.MinorType.FLOAT8.getType()).addField("col4", Types.MinorType.FLOAT8.getType()).addField("col5", Types.MinorType.FLOAT8.getType()).build();
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put("col3", SortedRangeSet.copyOf(Types.MinorType.FLOAT8.getType(), ImmutableList.of(Range.greaterThan(allocator, Types.MinorType.FLOAT8.getType(), -10000D)), false));
constraintsMap.put("col4", EquatableValueSet.newBuilder(allocator, Types.MinorType.FLOAT8.getType(), false, true).add(1.1D).build());
constraintsMap.put("col5", new AllOrNoneValueSet(Types.MinorType.FLOAT8.getType(), false, true));
Constraints constraints = new Constraints(constraintsMap);
Block partitions = allocator.createBlock(schema);
int num_partitions = 10;
for (int i = 0; i < num_partitions; i++) {
BlockUtils.setValue(partitions.getFieldVector(yearCol), i, 2016 + i);
BlockUtils.setValue(partitions.getFieldVector(monthCol), i, (i % 12) + 1);
BlockUtils.setValue(partitions.getFieldVector(dayCol), i, (i % 28) + 1);
}
partitions.setRowCount(num_partitions);
SpillLocation spillLocation = S3SpillLocation.newBuilder().withBucket("athena-virtuoso-test").withPrefix("lambda-spill").withQueryId("test-query-id").withSplitId("test-split-id").withIsDirectory(true).build();
EncryptionKey encryptionKey = new EncryptionKey("test-key".getBytes(), "test-nonce".getBytes());
Split split = Split.newBuilder(spillLocation, encryptionKey).add("year", "2017").add("month", "11").add("day", "1").build();
expected = new ReadRecordsRequest(federatedIdentity, "test-query-id", "test-catalog", new TableName("test-schema", "test-table"), schema, split, constraints, 100_000_000_000L, 100_000_000_000L);
String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsRequest.json");
expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
use of com.amazonaws.athena.connector.lambda.domain.Split in project aws-athena-query-federation by awslabs.
the class HbaseRecordHandler method readWithConstraint.
/**
* Scans HBase using the scan settings set on the requested Split by HbaseMetadataHandler.
*
* @see RecordHandler
*/
@Override
protected void readWithConstraint(BlockSpiller blockSpiller, ReadRecordsRequest request, QueryStatusChecker queryStatusChecker) throws IOException {
Schema projection = request.getSchema();
Split split = request.getSplit();
String conStr = split.getProperty(HBASE_CONN_STR);
boolean isNative = projection.getCustomMetadata().get(HBASE_NATIVE_STORAGE_FLAG) != null;
// setup the scan so that we only read the key range associated with the region represented by our Split.
Scan scan = new Scan(split.getProperty(START_KEY_FIELD).getBytes(), split.getProperty(END_KEY_FIELD).getBytes());
// attempts to push down a partial predicate using HBase Filters
scan.setFilter(pushdownPredicate(isNative, request.getConstraints()));
// setup the projection so we only pull columns/families that we need
for (Field next : request.getSchema().getFields()) {
addToProjection(scan, next);
}
getOrCreateConn(conStr).scanTable(HbaseSchemaUtils.getQualifiedTable(request.getTableName()), scan, (ResultScanner scanner) -> scanFilterProject(scanner, request, blockSpiller, queryStatusChecker));
}
use of com.amazonaws.athena.connector.lambda.domain.Split in project aws-athena-query-federation by awslabs.
the class HiveRecordHandlerTest method buildSplitSql.
@Test
public void buildSplitSql() throws SQLException {
TableName tableName = new TableName("testSchema", "testTable");
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARBINARY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("partition", Types.MinorType.VARCHAR.getType()).build());
Schema schema = schemaBuilder.build();
Split split = Mockito.mock(Split.class);
Mockito.when(split.getProperties()).thenReturn(Collections.singletonMap("partition", "p0"));
Mockito.when(split.getProperty(Mockito.eq("partition"))).thenReturn("p0");
Range range1a = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range1a.isSingleValue()).thenReturn(true);
Mockito.when(range1a.getLow().getValue()).thenReturn(1);
Range range1b = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range1b.isSingleValue()).thenReturn(true);
Mockito.when(range1b.getLow().getValue()).thenReturn(2);
ValueSet valueSet1 = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(valueSet1.getRanges().getOrderedRanges()).thenReturn(ImmutableList.of(range1a, range1b));
final long dateDays = TimeUnit.DAYS.toDays(Date.valueOf("2020-01-05").getTime());
ValueSet valueSet2 = getSingleValueSet(dateDays);
Constraints constraints = Mockito.mock(Constraints.class);
Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol2", valueSet2).build());
PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(Mockito.anyString())).thenReturn(expectedPreparedStatement);
PreparedStatement preparedStatement = this.hiveRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Assert.assertEquals(expectedPreparedStatement, preparedStatement);
}
Aggregations