Search in sources :

Example 1 with LogStream

use of com.amazonaws.services.logs.model.LogStream in project aws-athena-query-federation by awslabs.

the class CloudwatchMetadataHandler method getPartitions.

/**
 * Gets the list of LogStreams that need to be scanned to satisfy the requested table. In most cases this will be just
 * 1 LogStream and this results in just 1 partition. If, however, the request is for the special ALL_LOG_STREAMS view
 * then all LogStreams in the requested LogGroup (schema) are queried and turned into partitions 1:1.
 *
 * @note This method applies partition pruning based on the log_stream field.
 * @see MetadataHandler
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws Exception {
    CloudwatchTableName cwTableName = tableResolver.validateTable(request.getTableName());
    DescribeLogStreamsRequest cwRequest = new DescribeLogStreamsRequest(cwTableName.getLogGroupName());
    if (!ALL_LOG_STREAMS_TABLE.equals(cwTableName.getLogStreamName())) {
        cwRequest.setLogStreamNamePrefix(cwTableName.getLogStreamName());
    }
    DescribeLogStreamsResult result;
    do {
        result = invoker.invoke(() -> awsLogs.describeLogStreams(cwRequest));
        for (LogStream next : result.getLogStreams()) {
            // Each log stream that matches any possible partition pruning should be added to the partition list.
            blockWriter.writeRows((Block block, int rowNum) -> {
                boolean matched = block.setValue(LOG_GROUP_FIELD, rowNum, cwRequest.getLogGroupName());
                matched &= block.setValue(LOG_STREAM_FIELD, rowNum, next.getLogStreamName());
                matched &= block.setValue(LOG_STREAM_SIZE_FIELD, rowNum, next.getStoredBytes());
                return matched ? 1 : 0;
            });
        }
        cwRequest.setNextToken(result.getNextToken());
    } while (result.getNextToken() != null && queryStatusChecker.isQueryRunning());
}
Also used : Block(com.amazonaws.athena.connector.lambda.data.Block) DescribeLogStreamsRequest(com.amazonaws.services.logs.model.DescribeLogStreamsRequest) LogStream(com.amazonaws.services.logs.model.LogStream) DescribeLogStreamsResult(com.amazonaws.services.logs.model.DescribeLogStreamsResult)

Example 2 with LogStream

use of com.amazonaws.services.logs.model.LogStream in project aws-athena-query-federation by awslabs.

the class CloudwatchTableResolver method loadLogStreams.

/**
 * Loads the requested LogStream as identified by the TableName.
 *
 * @param logGroup The properly cased schema name.
 * @param logStream The table name to validate.
 * @return The CloudwatchTableName or null if not found.
 * @note This method also primes the cache with other CloudwatchTableNames found along the way while scaning Cloudwatch.
 */
private CloudwatchTableName loadLogStreams(String logGroup, String logStream) throws TimeoutException {
    // As an optimization, see if the table name is an exact match (meaning likely no casing issues)
    CloudwatchTableName result = loadLogStream(logGroup, logStream);
    if (result != null) {
        return result;
    }
    logger.info("loadLogStreams: Did not find a match for the table, falling back to LogGroup scan for  {}:{}", logGroup, logStream);
    DescribeLogStreamsRequest validateTableRequest = new DescribeLogStreamsRequest(logGroup);
    DescribeLogStreamsResult validateTableResult;
    do {
        validateTableResult = invoker.invoke(() -> awsLogs.describeLogStreams(validateTableRequest));
        for (LogStream nextStream : validateTableResult.getLogStreams()) {
            String logStreamName = nextStream.getLogStreamName();
            CloudwatchTableName nextCloudwatch = new CloudwatchTableName(logGroup, logStreamName);
            tableCache.put(nextCloudwatch.toTableName(), nextCloudwatch);
            if (nextCloudwatch.getLogStreamName().equalsIgnoreCase(logStreamName)) {
                // We stop loading once we find the one we care about. This is an optimization that
                // attempt to exploit the fact that we likely access more recent logstreams first.
                logger.info("loadLogStreams: Matched {} for {}", nextCloudwatch, logStream);
                return nextCloudwatch;
            }
        }
        validateTableRequest.setNextToken(validateTableResult.getNextToken());
    } while (validateTableResult.getNextToken() != null);
    // We could not find a match
    throw new IllegalArgumentException("No such table " + logGroup + " " + logStream);
}
Also used : DescribeLogStreamsRequest(com.amazonaws.services.logs.model.DescribeLogStreamsRequest) LogStream(com.amazonaws.services.logs.model.LogStream) DescribeLogStreamsResult(com.amazonaws.services.logs.model.DescribeLogStreamsResult)

Example 3 with LogStream

use of com.amazonaws.services.logs.model.LogStream in project aws-athena-query-federation by awslabs.

the class CloudwatchTableResolver method loadLogStream.

/**
 * Optomizaiton that attempts to load a specific  LogStream as identified by the TableName.
 *
 * @param logGroup The properly cased schema name.
 * @param logStream The table name to validate.
 * @return The CloudwatchTableName or null if not found.
 * @note This method also primes the cache with other CloudwatchTableNames found along the way while scanning Cloudwatch.
 */
private CloudwatchTableName loadLogStream(String logGroup, String logStream) throws TimeoutException {
    if (ALL_LOG_STREAMS_TABLE.equalsIgnoreCase(logStream)) {
        return new CloudwatchTableName(logGroup, ALL_LOG_STREAMS_TABLE);
    }
    String effectiveTableName = logStream;
    if (effectiveTableName.contains(LAMBDA_PATTERN)) {
        logger.info("loadLogStream: Appears to be a lambda log_stream, substituting Lambda pattern {} for {}", LAMBDA_PATTERN, effectiveTableName);
        effectiveTableName = effectiveTableName.replace(LAMBDA_PATTERN, LAMBDA_ACTUAL_PATTERN);
    }
    DescribeLogStreamsRequest request = new DescribeLogStreamsRequest(logGroup).withLogStreamNamePrefix(effectiveTableName);
    DescribeLogStreamsResult result = invoker.invoke(() -> awsLogs.describeLogStreams(request));
    for (LogStream nextStream : result.getLogStreams()) {
        String logStreamName = nextStream.getLogStreamName();
        CloudwatchTableName nextCloudwatch = new CloudwatchTableName(logGroup, logStreamName);
        if (nextCloudwatch.getLogStreamName().equalsIgnoreCase(logStreamName)) {
            logger.info("loadLogStream: Matched {} for {}:{}", nextCloudwatch, logGroup, logStream);
            return nextCloudwatch;
        }
    }
    return null;
}
Also used : DescribeLogStreamsRequest(com.amazonaws.services.logs.model.DescribeLogStreamsRequest) LogStream(com.amazonaws.services.logs.model.LogStream) DescribeLogStreamsResult(com.amazonaws.services.logs.model.DescribeLogStreamsResult)

Example 4 with LogStream

use of com.amazonaws.services.logs.model.LogStream in project aws-athena-query-federation by awslabs.

the class CloudwatchMetadataHandlerTest method setUp.

@Before
public void setUp() throws Exception {
    when(mockAwsLogs.describeLogStreams(any(DescribeLogStreamsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        return new DescribeLogStreamsResult().withLogStreams(new LogStream().withLogStreamName("table-9"), new LogStream().withLogStreamName("table-10"));
    });
    when(mockAwsLogs.describeLogGroups(any(DescribeLogGroupsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        return new DescribeLogGroupsResult().withLogGroups(new LogGroup().withLogGroupName("schema-1"), new LogGroup().withLogGroupName("schema-20"));
    });
    handler = new CloudwatchMetadataHandler(mockAwsLogs, new LocalKeyFactory(), mockSecretsManager, mockAthena, "spillBucket", "spillPrefix");
    allocator = new BlockAllocatorImpl();
}
Also used : DescribeLogGroupsResult(com.amazonaws.services.logs.model.DescribeLogGroupsResult) LogGroup(com.amazonaws.services.logs.model.LogGroup) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) InvocationOnMock(org.mockito.invocation.InvocationOnMock) DescribeLogStreamsRequest(com.amazonaws.services.logs.model.DescribeLogStreamsRequest) LogStream(com.amazonaws.services.logs.model.LogStream) DescribeLogStreamsResult(com.amazonaws.services.logs.model.DescribeLogStreamsResult) DescribeLogGroupsRequest(com.amazonaws.services.logs.model.DescribeLogGroupsRequest) LocalKeyFactory(com.amazonaws.athena.connector.lambda.security.LocalKeyFactory) Before(org.junit.Before)

Example 5 with LogStream

use of com.amazonaws.services.logs.model.LogStream in project aws-athena-query-federation by awslabs.

the class CloudwatchMetadataHandlerTest method doGetTableLayout.

@Test
public void doGetTableLayout() throws Exception {
    logger.info("doGetTableLayout - enter");
    when(mockAwsLogs.describeLogStreams(any(DescribeLogStreamsRequest.class))).thenAnswer((InvocationOnMock invocationOnMock) -> {
        DescribeLogStreamsRequest request = (DescribeLogStreamsRequest) invocationOnMock.getArguments()[0];
        DescribeLogStreamsResult result = new DescribeLogStreamsResult();
        Integer nextToken;
        if (request.getNextToken() == null) {
            nextToken = 1;
        } else if (Integer.valueOf(request.getNextToken()) < 3) {
            nextToken = Integer.valueOf(request.getNextToken()) + 1;
        } else {
            nextToken = null;
        }
        List<LogStream> logStreams = new ArrayList<>();
        if (request.getNextToken() == null || Integer.valueOf(request.getNextToken()) < 3) {
            int continuation = request.getNextToken() == null ? 0 : Integer.valueOf(request.getNextToken());
            for (int i = 0 + continuation * 100; i < 300; i++) {
                LogStream nextLogStream = new LogStream();
                nextLogStream.setLogStreamName("table-" + String.valueOf(i));
                nextLogStream.setStoredBytes(i * 1000L);
                logStreams.add(nextLogStream);
            }
        }
        result.withLogStreams(logStreams);
        if (nextToken != null) {
            result.setNextToken(String.valueOf(nextToken));
        }
        return result;
    });
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put("log_stream", EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add("table-10").build());
    Schema schema = SchemaBuilder.newBuilder().addStringField("log_stream").build();
    GetTableLayoutRequest req = new GetTableLayoutRequest(identity, "queryId", "default", new TableName("schema-1", "all_log_streams"), new Constraints(constraintsMap), schema, Collections.singleton("log_stream"));
    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);
    logger.info("doGetTableLayout - {}", res.getPartitions().getSchema());
    logger.info("doGetTableLayout - {}", res.getPartitions());
    assertTrue(res.getPartitions().getSchema().findField("log_stream") != null);
    assertTrue(res.getPartitions().getRowCount() == 1);
    verify(mockAwsLogs, times(4)).describeLogStreams(any(DescribeLogStreamsRequest.class));
    logger.info("doGetTableLayout - exit");
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) LogStream(com.amazonaws.services.logs.model.LogStream) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) DescribeLogStreamsRequest(com.amazonaws.services.logs.model.DescribeLogStreamsRequest) DescribeLogStreamsResult(com.amazonaws.services.logs.model.DescribeLogStreamsResult) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Aggregations

DescribeLogStreamsRequest (com.amazonaws.services.logs.model.DescribeLogStreamsRequest)11 LogStream (com.amazonaws.services.logs.model.LogStream)11 DescribeLogStreamsResult (com.amazonaws.services.logs.model.DescribeLogStreamsResult)10 Test (org.junit.Test)4 InvocationOnMock (org.mockito.invocation.InvocationOnMock)4 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)3 DescribeLogGroupsRequest (com.amazonaws.services.logs.model.DescribeLogGroupsRequest)3 ArrayList (java.util.ArrayList)3 CreateLogStreamRequest (com.amazonaws.services.logs.model.CreateLogStreamRequest)2 DescribeLogGroupsResult (com.amazonaws.services.logs.model.DescribeLogGroupsResult)2 LogGroup (com.amazonaws.services.logs.model.LogGroup)2 HashSet (java.util.HashSet)2 Test (org.junit.jupiter.api.Test)2 Level (ch.qos.logback.classic.Level)1 PatternLayout (ch.qos.logback.classic.PatternLayout)1 ILoggingEvent (ch.qos.logback.classic.spi.ILoggingEvent)1 LoggingEvent (ch.qos.logback.classic.spi.LoggingEvent)1 Block (com.amazonaws.athena.connector.lambda.data.Block)1 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)1 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)1