Search in sources :

Example 1 with Row

use of com.amazonaws.services.timestreamquery.model.Row in project aws-athena-query-federation by awslabs.

the class TestUtils method makeMockQueryResult.

public static QueryResult makeMockQueryResult(Schema schemaForRead, int numRows) {
    QueryResult mockResult = mock(QueryResult.class);
    final AtomicLong nextToken = new AtomicLong(0);
    when(mockResult.getRows()).thenAnswer((Answer<List<Row>>) invocationOnMock -> {
        List<Row> rows = new ArrayList<>();
        for (int i = 0; i < 100; i++) {
            nextToken.incrementAndGet();
            List<Datum> columnData = new ArrayList<>();
            for (Field nextField : schemaForRead.getFields()) {
                columnData.add(makeValue(nextField));
            }
            Row row = new Row();
            row.setData(columnData);
            rows.add(row);
        }
        return rows;
    });
    when(mockResult.getNextToken()).thenAnswer((Answer<String>) invocationOnMock -> {
        if (nextToken.get() < numRows) {
            return String.valueOf(nextToken.get());
        }
        return null;
    });
    return mockResult;
}
Also used : QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) Schema(org.apache.arrow.vector.types.pojo.Schema) FLOAT8(org.apache.arrow.vector.types.Types.MinorType.FLOAT8) Date(java.util.Date) Types(org.apache.arrow.vector.types.Types) SimpleDateFormat(java.text.SimpleDateFormat) HashMap(java.util.HashMap) Random(java.util.Random) TimeSeriesDataPoint(com.amazonaws.services.timestreamquery.model.TimeSeriesDataPoint) ArrayList(java.util.ArrayList) Answer(org.mockito.stubbing.Answer) Map(java.util.Map) GeneratedRowWriter(com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter) FieldResolver(com.amazonaws.athena.connector.lambda.data.FieldResolver) ConstraintProjector(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintProjector) FieldVector(org.apache.arrow.vector.FieldVector) Datum(com.amazonaws.services.timestreamquery.model.Datum) Field(org.apache.arrow.vector.types.pojo.Field) Mockito.when(org.mockito.Mockito.when) Row(com.amazonaws.services.timestreamquery.model.Row) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) BlockUtils(com.amazonaws.athena.connector.lambda.data.BlockUtils) Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor) Assert.assertEquals(org.junit.Assert.assertEquals) Mockito.mock(org.mockito.Mockito.mock) Field(org.apache.arrow.vector.types.pojo.Field) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) AtomicLong(java.util.concurrent.atomic.AtomicLong) ArrayList(java.util.ArrayList) List(java.util.List) Row(com.amazonaws.services.timestreamquery.model.Row)

Example 2 with Row

use of com.amazonaws.services.timestreamquery.model.Row in project aws-athena-query-federation by awslabs.

the class TimestreamMetadataHandlerTest method doGetTable.

@Test
public void doGetTable() throws Exception {
    logger.info("doGetTable - enter");
    when(mockGlue.getTable(any(com.amazonaws.services.glue.model.GetTableRequest.class))).thenReturn(mock(GetTableResult.class));
    when(mockTsQuery.query(any(QueryRequest.class))).thenAnswer((InvocationOnMock invocation) -> {
        QueryRequest request = invocation.getArgumentAt(0, QueryRequest.class);
        assertEquals("DESCRIBE \"default\".\"table1\"", request.getQueryString());
        List<Row> rows = new ArrayList<>();
        // TODO: Add types here
        rows.add(new Row().withData(new Datum().withScalarValue("availability_zone"), new Datum().withScalarValue("varchar"), new Datum().withScalarValue("dimension")));
        rows.add(new Row().withData(new Datum().withScalarValue("measure_value"), new Datum().withScalarValue("double"), new Datum().withScalarValue("measure_value")));
        rows.add(new Row().withData(new Datum().withScalarValue("measure_name"), new Datum().withScalarValue("varchar"), new Datum().withScalarValue("measure_name")));
        rows.add(new Row().withData(new Datum().withScalarValue("time"), new Datum().withScalarValue("timestamp"), new Datum().withScalarValue("timestamp")));
        return new QueryResult().withRows(rows);
    });
    GetTableRequest req = new GetTableRequest(identity, "query-id", "default", new TableName(defaultSchema, "table1"));
    GetTableResponse res = handler.doGetTable(allocator, req);
    logger.info("doGetTable - {}", res);
    assertEquals(4, res.getSchema().getFields().size());
    Field measureName = res.getSchema().findField("measure_name");
    assertEquals(Types.MinorType.VARCHAR, Types.getMinorTypeForArrowType(measureName.getType()));
    Field measureValue = res.getSchema().findField("measure_value");
    assertEquals(Types.MinorType.FLOAT8, Types.getMinorTypeForArrowType(measureValue.getType()));
    Field availabilityZone = res.getSchema().findField("availability_zone");
    assertEquals(Types.MinorType.VARCHAR, Types.getMinorTypeForArrowType(availabilityZone.getType()));
    Field time = res.getSchema().findField("time");
    assertEquals(Types.MinorType.DATEMILLI, Types.getMinorTypeForArrowType(time.getType()));
    logger.info("doGetTable - exit");
}
Also used : Datum(com.amazonaws.services.timestreamquery.model.Datum) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) ArrayList(java.util.ArrayList) GetTableRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableRequest) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Field(org.apache.arrow.vector.types.pojo.Field) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Row(com.amazonaws.services.timestreamquery.model.Row) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) Test(org.junit.Test)

Example 3 with Row

use of com.amazonaws.services.timestreamquery.model.Row in project aws-athena-query-federation by awslabs.

the class TimestreamMetadataHandler method doGetTable.

@Override
public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest request) throws Exception {
    logger.info("doGetTable: enter", request.getTableName());
    Schema schema = null;
    try {
        if (glue != null) {
            schema = super.doGetTable(blockAllocator, request, TABLE_FILTER).getSchema();
            logger.info("doGetTable: Retrieved schema for table[{}] from AWS Glue.", request.getTableName());
        }
    } catch (RuntimeException ex) {
        logger.warn("doGetTable: Unable to retrieve table[{}:{}] from AWS Glue.", request.getTableName().getSchemaName(), request.getTableName().getTableName(), ex);
    }
    if (schema == null) {
        TableName tableName = request.getTableName();
        String describeQuery = queryFactory.createDescribeTableQueryBuilder().withTablename(tableName.getTableName()).withDatabaseName(tableName.getSchemaName()).build();
        logger.info("doGetTable: Retrieving schema for table[{}] from TimeStream using describeQuery[{}].", request.getTableName(), describeQuery);
        QueryRequest queryRequest = new QueryRequest().withQueryString(describeQuery);
        SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
        do {
            QueryResult queryResult = tsQuery.query(queryRequest);
            for (Row next : queryResult.getRows()) {
                List<Datum> datum = next.getData();
                if (datum.size() != 3) {
                    throw new RuntimeException("Unexpected datum size " + datum.size() + " while getting schema from datum[" + datum.toString() + "]");
                }
                Field nextField = TimestreamSchemaUtils.makeField(datum.get(0).getScalarValue(), datum.get(1).getScalarValue());
                schemaBuilder.addField(nextField);
            }
            queryRequest = new QueryRequest().withNextToken(queryResult.getNextToken());
        } while (queryRequest.getNextToken() != null);
        schema = schemaBuilder.build();
    }
    return new GetTableResponse(request.getCatalogName(), request.getTableName(), schema);
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Field(org.apache.arrow.vector.types.pojo.Field) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) Datum(com.amazonaws.services.timestreamquery.model.Datum) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Row(com.amazonaws.services.timestreamquery.model.Row)

Example 4 with Row

use of com.amazonaws.services.timestreamquery.model.Row in project aws-athena-query-federation by awslabs.

the class TimestreamRecordHandler method buildRowWriter.

private GeneratedRowWriter buildRowWriter(ReadRecordsRequest request) {
    GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder(request.getConstraints());
    int fieldNum = 0;
    for (Field nextField : request.getSchema().getFields()) {
        int curFieldNum = fieldNum++;
        switch(Types.getMinorTypeForArrowType(nextField.getType())) {
            case VARCHAR:
                builder.withExtractor(nextField.getName(), (VarCharExtractor) (Object context, NullableVarCharHolder value) -> {
                    value.isSet = 1;
                    value.value = ((Row) context).getData().get(curFieldNum).getScalarValue();
                });
                break;
            case FLOAT8:
                builder.withExtractor(nextField.getName(), (Float8Extractor) (Object context, NullableFloat8Holder value) -> {
                    value.isSet = 1;
                    value.value = Double.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue());
                });
                break;
            case BIT:
                builder.withExtractor(nextField.getName(), (BitExtractor) (Object context, NullableBitHolder value) -> {
                    value.isSet = 1;
                    value.value = Boolean.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue()) == false ? 0 : 1;
                });
                break;
            case BIGINT:
                builder.withExtractor(nextField.getName(), (BigIntExtractor) (Object context, NullableBigIntHolder value) -> {
                    value.isSet = 1;
                    value.value = Long.valueOf(((Row) context).getData().get(curFieldNum).getScalarValue());
                });
                break;
            case DATEMILLI:
                builder.withExtractor(nextField.getName(), (DateMilliExtractor) (Object context, NullableDateMilliHolder value) -> {
                    value.isSet = 1;
                    value.value = TIMESTAMP_FORMATTER.parse(((Row) context).getData().get(curFieldNum).getScalarValue()).getTime();
                });
                break;
            case LIST:
                // TODO: This presently only supports TimeSeries results but it is possible that customers may
                // generate LIST type results for other reasons when using VIEWs. For now this seems like an OK
                // compromise since it enables an important capability of TimeStream even if it doesn't enable arbitrary
                // complex types.
                buildTimeSeriesExtractor(builder, nextField, curFieldNum);
                break;
            default:
                throw new RuntimeException("Unsupported field type[" + nextField.getType() + "] for field[" + nextField.getName() + "]");
        }
    }
    return builder.build();
}
Also used : NullableBitHolder(org.apache.arrow.vector.holders.NullableBitHolder) Field(org.apache.arrow.vector.types.pojo.Field) NullableVarCharHolder(com.amazonaws.athena.connector.lambda.data.writers.holders.NullableVarCharHolder) GeneratedRowWriter(com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter) NullableDateMilliHolder(org.apache.arrow.vector.holders.NullableDateMilliHolder) NullableFloat8Holder(org.apache.arrow.vector.holders.NullableFloat8Holder) Row(com.amazonaws.services.timestreamquery.model.Row) TimeSeriesDataPoint(com.amazonaws.services.timestreamquery.model.TimeSeriesDataPoint) NullableBigIntHolder(org.apache.arrow.vector.holders.NullableBigIntHolder)

Example 5 with Row

use of com.amazonaws.services.timestreamquery.model.Row in project aws-athena-query-federation by awslabs.

the class TimestreamRecordHandler method readWithConstraint.

/**
 * Scans TimeStream.
 *
 * @see RecordHandler
 */
@Override
protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recordsRequest, QueryStatusChecker queryStatusChecker) {
    TableName tableName = recordsRequest.getTableName();
    SelectQueryBuilder queryBuilder = queryFactory.createSelectQueryBuilder(GlueMetadataHandler.VIEW_METADATA_FIELD);
    String query = queryBuilder.withDatabaseName(tableName.getSchemaName()).withTableName(tableName.getTableName()).withProjection(recordsRequest.getSchema()).withConjucts(recordsRequest.getConstraints()).build();
    logger.info("readWithConstraint: query[{}]", query);
    GeneratedRowWriter rowWriter = buildRowWriter(recordsRequest);
    String nextToken = null;
    long numRows = 0;
    do {
        QueryResult queryResult = tsQuery.query(new QueryRequest().withQueryString(query).withNextToken(nextToken));
        List<Row> data = queryResult.getRows();
        if (data != null) {
            numRows += data.size();
            for (Row nextRow : data) {
                spiller.writeRows((Block block, int rowNum) -> rowWriter.writeRow(block, rowNum, nextRow) ? 1 : 0);
            }
        }
        nextToken = queryResult.getNextToken();
        logger.info("readWithConstraint: numRows[{}]", numRows);
    } while (nextToken != null && !nextToken.isEmpty());
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) GeneratedRowWriter(com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) SelectQueryBuilder(com.amazonaws.athena.connectors.timestream.query.SelectQueryBuilder) Block(com.amazonaws.athena.connector.lambda.data.Block) Row(com.amazonaws.services.timestreamquery.model.Row)

Aggregations

Row (com.amazonaws.services.timestreamquery.model.Row)8 Field (org.apache.arrow.vector.types.pojo.Field)5 Datum (com.amazonaws.services.timestreamquery.model.Datum)4 QueryResult (com.amazonaws.services.timestreamquery.model.QueryResult)4 ArrayList (java.util.ArrayList)4 GeneratedRowWriter (com.amazonaws.athena.connector.lambda.data.writers.GeneratedRowWriter)3 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)3 QueryRequest (com.amazonaws.services.timestreamquery.model.QueryRequest)3 TimeSeriesDataPoint (com.amazonaws.services.timestreamquery.model.TimeSeriesDataPoint)3 Extractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor)2 ConstraintProjector (com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintProjector)2 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)2 ColumnInfo (com.amazonaws.services.timestreamquery.model.ColumnInfo)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 FieldVector (org.apache.arrow.vector.FieldVector)2 Schema (org.apache.arrow.vector.types.pojo.Schema)2 Block (com.amazonaws.athena.connector.lambda.data.Block)1 BlockUtils (com.amazonaws.athena.connector.lambda.data.BlockUtils)1