Search in sources :

Example 96 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class HiveMetadataHandler method getPartitions.

/**
 * Used to get the hive partitions that must be read from the request table in order to satisfy the requested predicate.
 *
 * @param blockWriter Used to write rows (hive partitions) into the Apache Arrow response.
 * @param getTableLayoutRequest Provides details of the catalog, database, and table being queried as well as any filter predicate.
 * @param queryStatusChecker A QueryStatusChecker that you can use to stop doing work for a query that has already terminated
 * @throws SQLException A SQLException should be thrown for database connection failures , query syntax errors and so on.
 */
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws SQLException {
    LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName());
    try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
        Statement stmt = connection.createStatement();
        PreparedStatement psmt = connection.prepareStatement(GET_METADATA_QUERY + getTableLayoutRequest.getTableName().getTableName().toUpperCase())) {
        boolean isTablePartitioned = false;
        ResultSet partitionResultset = stmt.executeQuery("show table extended like " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
        while (partitionResultset != null && partitionResultset.next()) {
            String partExists = partitionResultset.getString(1);
            if (partExists.toUpperCase().contains("PARTITIONED")) {
                String partValue = partExists.split(":")[1];
                if (partValue.toUpperCase().contains("TRUE")) {
                    isTablePartitioned = true;
                }
            }
        }
        LOGGER.debug("isTablePartitioned:" + isTablePartitioned);
        if (isTablePartitioned) {
            ResultSet partitionRs = stmt.executeQuery("show partitions " + getTableLayoutRequest.getTableName().getTableName().toUpperCase());
            Set<String> partition = new HashSet<>();
            while (partitionRs != null && partitionRs.next()) {
                partition.add(partitionRs.getString("Partition"));
            }
            if (!partition.isEmpty()) {
                Map<String, String> columnHashMap = getMetadataForGivenTable(psmt);
                addPartitions(partition, columnHashMap, blockWriter);
            }
        } else {
            blockWriter.writeRows((Block block, int rowNum) -> {
                block.setValue(HiveConstants.BLOCK_PARTITION_COLUMN_NAME, rowNum, HiveConstants.ALL_PARTITIONS);
                return 1;
            });
        }
    }
}
Also used : PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) Block(com.amazonaws.athena.connector.lambda.data.Block) PreparedStatement(java.sql.PreparedStatement) HashSet(java.util.HashSet)

Example 97 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class MetricsMetadataHandlerTest method doGetMetricSamplesSplits.

@Test
public void doGetMetricSamplesSplits() throws Exception {
    logger.info("doGetMetricSamplesSplits: enter");
    String namespaceFilter = "MyNameSpace";
    String statistic = "p90";
    int numMetrics = 10;
    when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
        ListMetricsRequest request = invocation.getArgumentAt(0, ListMetricsRequest.class);
        // assert that the namespace filter was indeed pushed down
        assertEquals(namespaceFilter, request.getNamespace());
        String nextToken = (request.getNextToken() == null) ? "valid" : null;
        List<Metric> metrics = new ArrayList<>();
        for (int i = 0; i < numMetrics; i++) {
            metrics.add(new Metric().withNamespace(namespaceFilter).withMetricName("metric-" + i));
        }
        return new ListMetricsResult().withNextToken(nextToken).withMetrics(metrics);
    });
    Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
    Block partitions = allocator.createBlock(schema);
    BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
    partitions.setRowCount(1);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(NAMESPACE_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(namespaceFilter).build());
    constraintsMap.put(STATISTIC_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(statistic).build());
    String continuationToken = null;
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metric_samples"), partitions, Collections.singletonList("partitionId"), new Constraints(constraintsMap), continuationToken);
    int numContinuations = 0;
    do {
        GetSplitsRequest req = new GetSplitsRequest(originalReq, continuationToken);
        logger.info("doGetMetricSamplesSplits: req[{}]", req);
        MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
        assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
        GetSplitsResponse response = (GetSplitsResponse) rawResponse;
        continuationToken = response.getContinuationToken();
        logger.info("doGetMetricSamplesSplits: continuationToken[{}] - numSplits[{}]", continuationToken, response.getSplits().size());
        assertEquals(3, response.getSplits().size());
        for (Split nextSplit : response.getSplits()) {
            assertNotNull(nextSplit.getProperty(SERIALIZED_METRIC_STATS_FIELD_NAME));
        }
        if (continuationToken != null) {
            numContinuations++;
        }
    } while (continuationToken != null);
    assertEquals(1, numContinuations);
    logger.info("doGetMetricSamplesSplits: exit");
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) ListMetricsResult(com.amazonaws.services.cloudwatch.model.ListMetricsResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) ListMetricsRequest(com.amazonaws.services.cloudwatch.model.ListMetricsRequest) Metric(com.amazonaws.services.cloudwatch.model.Metric) Split(com.amazonaws.athena.connector.lambda.domain.Split) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 98 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class MetricsMetadataHandlerTest method doGetMetricSamplesSplitsEmptyMetrics.

@Test
public void doGetMetricSamplesSplitsEmptyMetrics() throws Exception {
    logger.info("doGetMetricSamplesSplitsEmptyMetrics: enter");
    String namespace = "NameSpace";
    String invalidNamespaceFilter = "InvalidNameSpace";
    int numMetrics = 10;
    when(mockMetrics.listMetrics(any(ListMetricsRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
        List<Metric> metrics = new ArrayList<>();
        for (int i = 0; i < numMetrics; i++) {
            metrics.add(new Metric().withNamespace(namespace).withMetricName("metric-" + i));
        }
        return new ListMetricsResult().withNextToken(null).withMetrics(metrics);
    });
    Schema schema = SchemaBuilder.newBuilder().addIntField("partitionId").build();
    Block partitions = allocator.createBlock(schema);
    BlockUtils.setValue(partitions.getFieldVector("partitionId"), 1, 1);
    partitions.setRowCount(1);
    Map<String, ValueSet> constraintsMap = new HashMap<>();
    constraintsMap.put(NAMESPACE_FIELD, EquatableValueSet.newBuilder(allocator, Types.MinorType.VARCHAR.getType(), true, false).add(invalidNamespaceFilter).build());
    GetSplitsRequest originalReq = new GetSplitsRequest(identity, "queryId", "catalog_name", new TableName(defaultSchema, "metric_samples"), partitions, Collections.singletonList("partitionId"), new Constraints(constraintsMap), null);
    GetSplitsRequest req = new GetSplitsRequest(originalReq, null);
    logger.info("doGetMetricSamplesSplitsEmptyMetrics: req[{}]", req);
    MetadataResponse rawResponse = handler.doGetSplits(allocator, req);
    assertEquals(MetadataRequestType.GET_SPLITS, rawResponse.getRequestType());
    GetSplitsResponse response = (GetSplitsResponse) rawResponse;
    assertEquals(0, response.getSplits().size());
    assertEquals(null, response.getContinuationToken());
}
Also used : GetSplitsRequest(com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest) HashMap(java.util.HashMap) Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) ListMetricsResult(com.amazonaws.services.cloudwatch.model.ListMetricsResult) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GetSplitsResponse(com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse) MetadataResponse(com.amazonaws.athena.connector.lambda.metadata.MetadataResponse) Block(com.amazonaws.athena.connector.lambda.data.Block) ListMetricsRequest(com.amazonaws.services.cloudwatch.model.ListMetricsRequest) Metric(com.amazonaws.services.cloudwatch.model.Metric) ValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet) EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) Test(org.junit.Test)

Example 99 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class CloudwatchRecordHandler method readWithConstraint.

/**
 * Scans Cloudwatch Logs using the LogStream and optional Time stamp filters.
 *
 * @see RecordHandler
 */
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) throws TimeoutException {
    String continuationToken = null;
    TableName tableName = recordsRequest.getTableName();
    Split split = recordsRequest.getSplit();
    invoker.setBlockSpiller(spiller);
    do {
        final String actualContinuationToken = continuationToken;
        GetLogEventsResult logEventsResult = invoker.invoke(() -> awsLogs.getLogEvents(pushDownConstraints(recordsRequest.getConstraints(), new GetLogEventsRequest().withLogGroupName(split.getProperty(LOG_GROUP_FIELD)).withLogStreamName(split.getProperty(LOG_STREAM_FIELD)).withNextToken(actualContinuationToken).withStartFromHead(true))));
        if (continuationToken == null || !continuationToken.equals(logEventsResult.getNextForwardToken())) {
            continuationToken = logEventsResult.getNextForwardToken();
        } else {
            continuationToken = null;
        }
        for (OutputLogEvent ole : logEventsResult.getEvents()) {
            spiller.writeRows((Block block, int rowNum) -> {
                boolean matched = true;
                matched &= block.offerValue(LOG_STREAM_FIELD, rowNum, split.getProperty(LOG_STREAM_FIELD));
                matched &= block.offerValue(LOG_TIME_FIELD, rowNum, ole.getTimestamp());
                matched &= block.offerValue(LOG_MSG_FIELD, rowNum, ole.getMessage());
                return matched ? 1 : 0;
            });
        }
        logger.info("readWithConstraint: LogGroup[{}] LogStream[{}] Continuation[{}] rows[{}]", tableName.getSchemaName(), tableName.getTableName(), continuationToken, logEventsResult.getEvents().size());
    } while (continuationToken != null && queryStatusChecker.isQueryRunning());
}
Also used : OutputLogEvent(com.amazonaws.services.logs.model.OutputLogEvent) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GetLogEventsResult(com.amazonaws.services.logs.model.GetLogEventsResult) Block(com.amazonaws.athena.connector.lambda.data.Block) GetLogEventsRequest(com.amazonaws.services.logs.model.GetLogEventsRequest) Split(com.amazonaws.athena.connector.lambda.domain.Split)

Example 100 with Block

use of com.amazonaws.athena.connector.lambda.data.Block in project aws-athena-query-federation by awslabs.

the class QueryUtils method makePredicate.

/**
 * Converts a single field constraint into a Document for use in a DocumentDB query.
 *
 * @param field The field for the given ValueSet constraint.
 * @param constraint The constraint to apply to the given field.
 * @return A Document describing the constraint for pushing down into DocumentDB.
 */
public static Document makePredicate(Field field, ValueSet constraint) {
    String name = field.getName();
    if (constraint.isNone()) {
        return documentOf(name, isNullPredicate());
    }
    if (constraint.isAll()) {
        return documentOf(name, isNotNullPredicate());
    }
    if (constraint.isNullAllowed()) {
        // TODO: support nulls mixed with discrete value constraints
        return null;
    }
    if (constraint instanceof EquatableValueSet) {
        Block block = ((EquatableValueSet) constraint).getValues();
        List<Object> singleValues = new ArrayList<>();
        FieldReader fieldReader = block.getFieldReaders().get(0);
        for (int i = 0; i < block.getRowCount(); i++) {
            Document nextEqVal = new Document();
            fieldReader.setPosition(i);
            Object value = fieldReader.readObject();
            nextEqVal.put(EQ_OP, convert(value));
            singleValues.add(singleValues);
        }
        return orPredicate(singleValues.stream().map(next -> new Document(name, next)).collect(toList()));
    }
    List<Object> singleValues = new ArrayList<>();
    List<Document> disjuncts = new ArrayList<>();
    for (Range range : constraint.getRanges().getOrderedRanges()) {
        if (range.isSingleValue()) {
            singleValues.add(convert(range.getSingleValue()));
        } else {
            Document rangeConjuncts = new Document();
            if (!range.getLow().isLowerUnbounded()) {
                switch(range.getLow().getBound()) {
                    case ABOVE:
                        rangeConjuncts.put(GT_OP, convert(range.getLow().getValue()));
                        break;
                    case EXACTLY:
                        rangeConjuncts.put(GTE_OP, convert(range.getLow().getValue()));
                        break;
                    case BELOW:
                        throw new IllegalArgumentException("Low Marker should never use BELOW bound: " + range);
                    default:
                        throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                }
            }
            if (!range.getHigh().isUpperUnbounded()) {
                switch(range.getHigh().getBound()) {
                    case ABOVE:
                        throw new IllegalArgumentException("High Marker should never use ABOVE bound: " + range);
                    case EXACTLY:
                        rangeConjuncts.put(LTE_OP, convert(range.getHigh().getValue()));
                        break;
                    case BELOW:
                        rangeConjuncts.put(LT_OP, convert(range.getHigh().getValue()));
                        break;
                    default:
                        throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                }
            }
            // If rangeConjuncts is null, then the range was ALL, which should already have been checked for
            verify(!rangeConjuncts.isEmpty());
            disjuncts.add(rangeConjuncts);
        }
    }
    // Add back all of the possible single values either as an equality or an IN predicate
    if (singleValues.size() == 1) {
        disjuncts.add(documentOf(EQ_OP, singleValues.get(0)));
    } else if (singleValues.size() > 1) {
        disjuncts.add(documentOf(IN_OP, singleValues));
    }
    return orPredicate(disjuncts.stream().map(disjunct -> new Document(name, disjunct)).collect(toList()));
}
Also used : EquatableValueSet(com.amazonaws.athena.connector.lambda.domain.predicate.EquatableValueSet) ArrayList(java.util.ArrayList) Document(org.bson.Document) Range(com.amazonaws.athena.connector.lambda.domain.predicate.Range) Block(com.amazonaws.athena.connector.lambda.data.Block) FieldReader(org.apache.arrow.vector.complex.reader.FieldReader)

Aggregations

Block (com.amazonaws.athena.connector.lambda.data.Block)113 Test (org.junit.Test)39 HashMap (java.util.HashMap)35 Schema (org.apache.arrow.vector.types.pojo.Schema)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)32 Split (com.amazonaws.athena.connector.lambda.domain.Split)31 GetSplitsResponse (com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse)28 FieldReader (org.apache.arrow.vector.complex.reader.FieldReader)28 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)27 SpillLocation (com.amazonaws.athena.connector.lambda.domain.spill.SpillLocation)23 HashSet (java.util.HashSet)23 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)20 Field (org.apache.arrow.vector.types.pojo.Field)17 GetSplitsRequest (com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest)13 PreparedStatement (java.sql.PreparedStatement)13 ResultSet (java.sql.ResultSet)13 ArrayList (java.util.ArrayList)13 MetadataResponse (com.amazonaws.athena.connector.lambda.metadata.MetadataResponse)12 Connection (java.sql.Connection)12 ReadRecordsRequest (com.amazonaws.athena.connector.lambda.records.ReadRecordsRequest)11