use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class HiveMetadataHandler method getPartitions.
/**
* Used to get the hive partitions that must be read from the request table in order to satisfy the requested predicate.
*
* @param blockWriter Used to write rows (hive partitions) into the Apache Arrow response.
* @param getTableLayoutRequest Provides details of the catalog, database, and table being queried as well as any filter predicate.
* @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
* @throws SQLException A SQLException should be thrown for database connection failures , query syntax errors and so on.
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws SQLException {
LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
Statement stmt = connection.createStatement();
PreparedStatement psmt = connection.prepareStatement(GET_METADATA_QUERY + getTableLayoutRequest.getTableName().getTableName().toUpperCase())) {
boolean isTablePartitioned = false;
ResultSet partitionResultset = stmt.executeQuery("show table extended like " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
while (partitionResultset != null && partitionResultset.next()) {
String partExists = partitionResultset.getString(1);
if (partExists.toUpperCase().contains("PARTITIONED")) {
String partValue = partExists.split(":")[1];
if (partValue.toUpperCase().contains("TRUE")) {
isTablePartitioned = true;
}
}
}
LOGGER.debug("isTablePartitioned:" + isTablePartitioned);
if (isTablePartitioned) {
ResultSet partitionRs = stmt.executeQuery("show partitions " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
Set<String> partition = new HashSet<>();
while (partitionRs != null && partitionRs.next()) {
partition.add(partitionRs.getString("Partition"));
}
if (!partition.isEmpty()) {
Map<String, String> columnHashMap = getMetadataForGivenTable(psmt);
addPartitions(partition, columnHashMap, blockWriter);
}
} else {
blockWriter.writeRows((Block block, int rowNum) -> {
block.setValue(HiveConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, HiveConstants.ALL_PARTITIONS);
return 1;
});
}
}
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class MetricsMetadataHandlerTest method doGetMetricSamplesSplits.
@Test
public void doGetMetricSamplesSplits() throws Exception {
logger.info("doGetMetricSamplesSplits: enter");
String namespaceFilter = "MyNameSpace";
String statistic = "p90";
int numMetrics = 10;
when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
ListMetricsRequest request = invocation.getArgumentAt(0, ListMetricsRequest.class);
// assert that the namespace filter was indeed pushed down
assertEquals(namespaceFilter, request.getNamespace());
String nextToken = (request.getNextToken() == null) ? "valid" : null;
List<Metric> metrics = new ArrayList<>();
for (int i = 0; i < numMetrics; i++) {
metrics.add(new Metric().withNamespace(namespaceFilter).withMetricName("metric-" + i));
}
return new ListMetricsResult().withNextToken(nextToken).withMetrics(metrics);
});
Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
Block partitions = allocator.createBlock(schema);
BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
partitions.setRowCount(1);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(NAMESPACE_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(namespaceFilter).build());
constraintsMap.put(STATISTIC_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(statistic).build());
String continuationToken = null;
GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metric_samples"), partitions, Collections.singletonList("partitionId"), new Constraints(constraintsMap), continuationToken);
int numContinuations = 0;
do {
GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
logger.info("doGetMetricSamplesSplits: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
continuationToken = response.getContinuationToken();
logger.info("doGetMetricSamplesSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
assertEquals(3, response.getSplits().size());
for (Split nextSplit : response.getSplits()) {
assertNotNull(nextSplit.getProperty(SERIALIZED_METRIC_STATS_FIELD_NAME));
}
if (continuationToken != null) {
numContinuations++;
}
} while (continuationToken != null);
assertEquals(1, numContinuations);
logger.info("doGetMetricSamplesSplits: exit");
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class MetricsMetadataHandlerTest method doGetMetricSamplesSplitsEmptyMetrics.
@Test
public void doGetMetricSamplesSplitsEmptyMetrics() throws Exception {
logger.info("doGetMetricSamplesSplitsEmptyMetrics: enter");
String namespace = "NameSpace";
String invalidNamespaceFilter = "InvalidNameSpace";
int numMetrics = 10;
when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
List<Metric> metrics = new ArrayList<>();
for (int i = 0; i < numMetrics; i++) {
metrics.add(new Metric().withNamespace(namespace).withMetricName("metric-" + i));
}
return new ListMetricsResult().withNextToken(null).withMetrics(metrics);
});
Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
Block partitions = allocator.createBlock(schema);
BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
partitions.setRowCount(1);
Map<String, ValueSet> constraintsMap = new HashMap<>();
constraintsMap.put(NAMESPACE_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(invalidNamespaceFilter).build());
GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metric_samples"), partitions, Collections.singletonList("partitionId"), new Constraints(constraintsMap), null);
GetSplitsRequest req = new GetSplitsRequest(originalReq, null);
logger.info("doGetMetricSamplesSplitsEmptyMetrics: req[{}]", req);
MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
GetSplitsResponse response = (GetSplitsResponse) rawResponse;
assertEquals(0, response.getSplits().size());
assertEquals(null, response.getContinuationToken());
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class CloudwatchRecordHandler method readWithConstraint.
/**
* Scans Cloudwatch Logs using the LogStream and optional Time stamp filters.
*
* @see RecordHandler
*/
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) throws TimeoutException {
String continuationToken = null;
TableName tableName = recordsRequest.getTableName();
Split split = recordsRequest.getSplit();
invoker.setBlockSpiller(spiller);
do {
final String actualContinuationToken = continuationToken;
GetLogEventsResult logEventsResult = invoker.invoke(() -> awsLogs.getLogEvents(pushDownConstraints(recordsRequest.getConstraints(), new GetLogEventsRequest().withLogGroupName(split.getProperty(LOG_GROUP_FIELD)).withLogStreamName(split.getProperty(LOG_STREAM_FIELD)).withNextToken(actualContinuationToken).withStartFromHead(true))));
if (continuationToken == null || !continuationToken.equals(logEventsResult.getNextForwardToken())) {
continuationToken = logEventsResult.getNextForwardToken();
} else {
continuationToken = null;
}
for (OutputLogEvent ole : logEventsResult.getEvents()) {
spiller.writeRows((Block block, int rowNum) -> {
boolean matched = true;
matched &= block.offerValue(LOG_STREAM_FIELD, rowNum, split.getProperty(LOG_STREAM_FIELD));
matched &= block.offerValue(LOG_TIME_FIELD, rowNum, ole.getTimestamp());
matched &= block.offerValue(LOG_MSG_FIELD, rowNum, ole.getMessage());
return matched ? 1 : 0;
});
}
logger.info("readWithConstraint: LogGroup[{}] LogStream[{}] Continuation[{}] rows[{}]", tableName.getSchemaName(), tableName.getTableName(), continuationToken, logEventsResult.getEvents().size());
} while (continuationToken != null && queryStatusChecker.isQueryRunning());
}
use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.
the class QueryUtils method makePredicate.
/**
* Converts a single field constraint into a Document for use in a DocumentDB query.
*
* @param field The field for the given ValueSet constraint.
* @param constraint The constraint to apply to the given field.
* @return A Document describing the constraint for pushing down into DocumentDB.
*/
public static Document makePredicate(Field field, ValueSet constraint) {
String name = field.getName();
if (constraint.isNone()) {
return documentOf(name, isNullPredicate());
}
if (constraint.isAll()) {
return documentOf(name, isNotNullPredicate());
}
if (constraint.isNullAllowed()) {
// TODO: support nulls mixed with discrete value constraints
return null;
}
if (constraint instanceof EquatableValueSet) {
Block block = ((EquatableValueSet) constraint).getValues();
List<Object> singleValues = new ArrayList<>();
FieldReader fieldReader = block.getFieldReaders().get(0);
for (int i = 0; i < block.getRowCount(); i++) {
Document nextEqVal = new Document();
fieldReader.setPosition(i);
Object value = fieldReader.readObject();
nextEqVal.put(EQ_OP, convert(value));
singleValues.add(singleValues);
}
return orPredicate(singleValues.stream().map(next -> new Document(name, next)).collect(toList()));
}
List<Object> singleValues = new ArrayList<>();
List<Document> disjuncts = new ArrayList<>();
for (Range range : constraint.getRanges().getOrderedRanges()) {
if (range.isSingleValue()) {
singleValues.add(convert(range.getSingleValue()));
} else {
Document rangeConjuncts = new Document();
if (!range.getLow().isLowerUnbounded()) {
switch(range.getLow().getBound()) {
case ABOVE:
rangeConjuncts.put(GT_OP, convert(range.getLow().getValue()));
break;
case EXACTLY:
rangeConjuncts.put(GTE_OP, convert(range.getLow().getValue()));
break;
case BELOW:
throw new IllegalArgumentException("Low Marker should never use BELOW bound: " + range);
default:
throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
}
}
if (!range.getHigh().isUpperUnbounded()) {
switch(range.getHigh().getBound()) {
case ABOVE:
throw new IllegalArgumentException("High Marker should never use ABOVE bound: " + range);
case EXACTLY:
rangeConjuncts.put(LTE_OP, convert(range.getHigh().getValue()));
break;
case BELOW:
rangeConjuncts.put(LT_OP, convert(range.getHigh().getValue()));
break;
default:
throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
}
}
// If rangeConjuncts is null, then the range was ALL, which should already have been checked for
verify(!rangeConjuncts.isEmpty());
disjuncts.add(rangeConjuncts);
}
}
// Add back all of the possible single values either as an equality or an IN predicate
if (singleValues.size() == 1) {
disjuncts.add(documentOf(EQ_OP, singleValues.get(0)));
} else if (singleValues.size() > 1) {
disjuncts.add(documentOf(IN_OP, singleValues));
}
return orPredicate(disjuncts.stream().map(disjunct -> new Document(name, disjunct)).collect(toList()));
}
Aggregations