Search in sources :

Example 86 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class Ec2TableProvider method readWithConstraint.

/**
 * Calls DescribeInstances on the AWS EC2 Client returning all instances that match the supplied predicate and attempting
 * to push down certain predicates (namely queries for specific ec2 instance) to EC2.
 *
 * @See TableProvider
 */
@Override
public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) {
    boolean done = false;
    DescribeInstancesRequest request = new DescribeInstancesRequest();
    ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("instance_id");
    if (idConstraint != null && idConstraint.isSingleValue()) {
        request.setInstanceIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
    }
    while (!done) {
        DescribeInstancesResult response = ec2.describeInstances(request);
        for (Reservation reservation : response.getReservations()) {
            for (Instance instance : reservation.getInstances()) {
                instanceToRow(instance, spiller);
            }
        }
        request.setNextToken(response.getNextToken());
        if (response.getNextToken() == null || !queryStatusChecker.isQueryRunning()) {
            done = true;
        }
    }
}
Also used : DescribeInstancesResult(com.amazonaws.services.ec2.model.DescribeInstancesResult) Reservation(com.amazonaws.services.ec2.model.Reservation) Instance(com.amazonaws.services.ec2.model.Instance) DescribeInstancesRequest(com.amazonaws.services.ec2.model.DescribeInstancesRequest) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 87 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class ImagesTableProvider method readWithConstraint.

/**
 * Calls DescribeImagess on the AWS EC2 Client returning all images that match the supplied predicate and attempting
 * to push down certain predicates (namely queries for specific volumes) to EC2.
 *
 * @note Because of the large number of public AMIs we also support using a default 'owner' filter if your query doesn't
 * filter on owner itself. You can set this using an env variable on your Lambda function defined by DEFAULT_OWNER_ENV.
 * @See TableProvider
 */
@Override
public void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) {
    DescribeImagesRequest request = new DescribeImagesRequest();
    ValueSet idConstraint = recordsRequest.getConstraints().getSummary().get("id");
    ValueSet ownerConstraint = recordsRequest.getConstraints().getSummary().get("owner");
    if (idConstraint != null && idConstraint.isSingleValue()) {
        request.setImageIds(Collections.singletonList(idConstraint.getSingleValue().toString()));
    } else if (ownerConstraint != null && ownerConstraint.isSingleValue()) {
        request.setOwners(Collections.singletonList(ownerConstraint.getSingleValue().toString()));
    } else if (DEFAULT_OWNER != null) {
        request.setOwners(Collections.singletonList(DEFAULT_OWNER));
    } else {
        throw new RuntimeException("A default owner account must be set or the query must have owner" + "in the where clause with exactly 1 value otherwise results may be too big.");
    }
    DescribeImagesResult response = ec2.describeImages(request);
    int count = 0;
    for (Image next : response.getImages()) {
        if (count++ > MAX_IMAGES) {
            throw new RuntimeException("Too many images returned, add an owner or id filter.");
        }
        instanceToRow(next, spiller);
    }
}
Also used : DescribeImagesResult(com.amazonaws.services.ec2.model.DescribeImagesResult) DescribeImagesRequest(com.amazonaws.services.ec2.model.DescribeImagesRequest) Image(com.amazonaws.services.ec2.model.Image) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)

Example 88 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class ImpalaRecordHandlerTest method buildSplitSql.

@Test
public void buildSplitSql() throws SQLException {
    TableName tableName = new TableName("testSchema", "testTable");
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARBINARY.getType()).build());
    schemaBuilder.addField(FieldBuilder.newBuilder("partition", Types.MinorType.VARCHAR.getType()).build());
    Schema schema = schemaBuilder.build();
    Split split = Mockito.mock(Split.class);
    Mockito.when(split.getProperties()).thenReturn(Collections.singletonMap("partition", "p0"));
    Mockito.when(split.getProperty(Mockito.eq("partition"))).thenReturn("p0");
    Range range1a = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range1a.isSingleValue()).thenReturn(true);
    Mockito.when(range1a.getLow().getValue()).thenReturn(1);
    Range range1b = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(range1b.isSingleValue()).thenReturn(true);
    Mockito.when(range1b.getLow().getValue()).thenReturn(2);
    ValueSet valueSet1 = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(valueSet1.getRanges().getOrderedRanges()).thenReturn(ImmutableList.of(range1a, range1b));
    final long dateDays = TimeUnit.DAYS.toDays(Date.valueOf("2020-01-05").getTime());
    ValueSet valueSet2 = getSingleValueSet(dateDays);
    Constraints constraints = Mockito.mock(Constraints.class);
    Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol2", valueSet2).build());
    PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(Mockito.anyString())).thenReturn(expectedPreparedStatement);
    PreparedStatement preparedStatement = this.impalaRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
    Assert.assertEquals(expectedPreparedStatement, preparedStatement);
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) Schema(org.apache.arrow.vector.types.pojo.Schema) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) PreparedStatement(java.sql.PreparedStatement) Split(com.amazonaws.athena.connector.lambda.domain.Split) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 89 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class MetricsMetadataHandlerTest method doGetMetricSamplesSplits.

@Test
public void doGetMetricSamplesSplits() throws Exception {
    logger.info("doGetMetricSamplesSplits: enter");
    String namespaceFilter = "MyNameSpace";
    String statistic = "p90";
    int numMetrics = 10;
    when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
        ListMetricsRequest request = invocation.getArgumentAt(0, ListMetricsRequest.class);
        // assert that the namespace filter was indeed pushed down
        assertEquals(namespaceFilter, request.getNamespace());
        String nextToken = (request.getNextToken() == null) ? "valid" : null;
        List<Metric> metrics = new ArrayList<>();
        for (int i = 0; i < numMetrics; i++) {
            metrics.add(new Metric().withNamespace(namespaceFilter).withMetricName("metric-" + i));
        }
        return new ListMetricsResult().withNextToken(nextToken).withMetrics(metrics);
    });
    Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
    Block partitions = allocator.createBlock(schema);
    BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
    partitions.setRowCount(1);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(NAMESPACE_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(namespaceFilter).build());
    constraintsMap.put(STATISTIC_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(statistic).build());
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metric_samples"), partitions, Collections.singletonList("partitionId"), new Constraints(constraintsMap), continuationToken);
    int numContinuations = 0;
    do {
        GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
        logger.info("doGetMetricSamplesSplits: req[{}]", req);
        MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
        assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
        GetSplitsResponse response = (GetSplitsResponse) rawResponse;
        continuationToken = response.getContinuationToken();
        logger.info("doGetMetricSamplesSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
        assertEquals(3, response.getSplits().size());
        for (Split nextSplit : response.getSplits()) {
            assertNotNull(nextSplit.getProperty(SERIALIZED_METRIC_STATS_FIELD_NAME));
        }
        if (continuationToken != null) {
            numContinuations++;
        }
    } while (continuationToken != null);
    assertEquals(1, numContinuations);
    logger.info("doGetMetricSamplesSplits: exit");
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) ListMetricsResult(com.amazonaws.services.cloudwatch.model.ListMetricsResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) ListMetricsRequest(com.amazonaws.services.cloudwatch.model.ListMetricsRequest) Metric(com.amazonaws.services.cloudwatch.model.Metric) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 90 with ValueSet

use of com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet in project aws-athena-query-federation by awslabs.

the class MetricsMetadataHandlerTest method doGetMetricSamplesSplitsEmptyMetrics.

@Test
public void doGetMetricSamplesSplitsEmptyMetrics() throws Exception {
    logger.info("doGetMetricSamplesSplitsEmptyMetrics: enter");
    String namespace = "NameSpace";
    String invalidNamespaceFilter = "InvalidNameSpace";
    int numMetrics = 10;
    when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
        List<Metric> metrics = new ArrayList<>();
        for (int i = 0; i < numMetrics; i++) {
            metrics.add(new Metric().withNamespace(namespace).withMetricName("metric-" + i));
        }
        return new ListMetricsResult().withNextToken(null).withMetrics(metrics);
    });
    Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
    Block partitions = allocator.createBlock(schema);
    BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
    partitions.setRowCount(1);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(NAMESPACE_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(invalidNamespaceFilter).build());
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metric_samples"), partitions, Collections.singletonList("partitionId"), new Constraints(constraintsMap), null);
    GetSplitsRequest req = new GetSplitsRequest(originalReq, null);
    logger.info("doGetMetricSamplesSplitsEmptyMetrics: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    assertEquals(0, response.getSplits().size());
    assertEquals(null, response.getContinuationToken());
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) ListMetricsResult(com.amazonaws.services.cloudwatch.model.ListMetricsResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) ListMetricsRequest(com.amazonaws.services.cloudwatch.model.ListMetricsRequest) Metric(com.amazonaws.services.cloudwatch.model.Metric) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Aggregations

ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)104 Test (org.junit.Test)66 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)63 HashMap (java.util.HashMap)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)47 Schema (org.apache.arrow.vector.types.pojo.Schema)37 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 Range (com.amazonaws.athena.connector.lambda.domain.predicate.Range)27 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)27 EquatableValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet)26 ArrayList (java.util.ArrayList)25 Matchers.anyString (org.mockito.Matchers.anyString)25 RecordResponse (com.amazonaws.athena.connector.lambda.records.RecordResponse)24 Block (com.amazonaws.athena.connector.lambda.data.Block)23 S3SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.S3SpillLocation)21 RemoteReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.RemoteReadRecordsResponse)18 SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)17 ReadRecordsResponse (com.amazonaws.athena.connector.lambda.records.ReadRecordsResponse)17 InvocationOnMock (org.mockito.invocation.InvocationOnMock)17 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)13