Search in sources :

Example 21 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SqlServerMetadataHandlerTest method doGetTableLayoutWithNoPartitions.

@Test
public void doGetTableLayoutWithNoPartitions() throws Exception {
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.sqlServerMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
    String[] columns = { sqlServerMetadataHandler.PARTITION_NUMBER };
    int[] types = { Types.VARCHAR };
    Object[][] values = { {} };
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
    PreparedStatement rowCountPreparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.ROW_COUNT_QUERY)).thenReturn(rowCountPreparedStatement);
    ResultSet rowCountResultSet = mockResultSet(new String[] { "ROW_COUNT" }, new int[] { Types.INTEGER }, new Object[][] { { 0 } }, new AtomicInteger(-1));
    Mockito.when(rowCountPreparedStatement.executeQuery()).thenReturn(rowCountResultSet);
    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
    GetTableLayoutResponse getTableLayoutResponse = this.sqlServerMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
    Assert.assertEquals(values.length, getTableLayoutResponse.getPartitions().getRowCount());
    List<String> actualValues = new ArrayList<>();
    for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
        actualValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
    }
    Assert.assertEquals(Arrays.asList("[PARTITION_NUMBER : 0]"), actualValues);
    SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
    expectedSchemaBuilder.addField(FieldBuilder.newBuilder(sqlServerMetadataHandler.PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
    Schema expectedSchema = expectedSchemaBuilder.build();
    Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
    Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
    Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, getTableLayoutRequest.getTableName().getSchemaName() + "." + getTableLayoutRequest.getTableName().getTableName());
}
Also used : Schema(org.apache.arrow.vector.types.pojo.Schema) ArrayList(java.util.ArrayList) PreparedStatement(java.sql.PreparedStatement) TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Constraints(com.amazonaws.athena.connector.lambda.domain.predicate.Constraints) GetTableLayoutResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BlockAllocator(com.amazonaws.athena.connector.lambda.data.BlockAllocator) GetTableLayoutRequest(com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest) ResultSet(java.sql.ResultSet) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Test(org.junit.Test)

Example 22 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class SynapseMetadataHandler method getSchema.

/**
 * Appropriate datatype to arrow type conversions will be done by fetching data types of columns
 * @param jdbcConnection
 * @param tableName
 * @param partitionSchema
 * @return
 * @throws SQLException
 */
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
    LOGGER.info("Inside getSchema");
    String dataTypeQuery = "SELECT C.NAME AS COLUMN_NAME, TYPE_NAME(C.USER_TYPE_ID) AS DATA_TYPE " + "FROM SYS.COLUMNS C " + "JOIN SYS.TYPES T " + "ON C.USER_TYPE_ID=T.USER_TYPE_ID " + "WHERE C.OBJECT_ID=OBJECT_ID(?)";
    String dataType;
    String columnName;
    HashMap<String, String> hashMap = new HashMap<>();
    boolean found = false;
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData());
        Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
        PreparedStatement stmt = connection.prepareStatement(dataTypeQuery)) {
        // fetch data types of columns and prepare map with column name and datatype.
        stmt.setString(1, tableName.getSchemaName() + "." + tableName.getTableName());
        try (ResultSet dataTypeResultSet = stmt.executeQuery()) {
            while (dataTypeResultSet.next()) {
                dataType = dataTypeResultSet.getString("DATA_TYPE");
                columnName = dataTypeResultSet.getString("COLUMN_NAME");
                hashMap.put(columnName.trim(), dataType.trim());
            }
        }
        while (resultSet.next()) {
            ArrowType columnType = JdbcArrowTypeConverter.toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
            columnName = resultSet.getString("COLUMN_NAME");
            dataType = hashMap.get(columnName);
            LOGGER.debug("columnName: " + columnName);
            LOGGER.debug("dataType: " + dataType);
            /**
             * Converting date data type into DATEDAY since framework is unable to do it by default
             */
            if ("date".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.DATEDAY.getType();
            }
            /**
             * Converting bit data type into TINYINT because BIT type is showing 0 as false and 1 as true.
             * we can avoid it by changing to TINYINT.
             */
            if ("bit".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.TINYINT.getType();
            }
            /**
             * Converting tinyint data type into SMALLINT.
             * TINYINT range is 0 to 255 in SQL Server, usage of TINYINT(ArrowType) leads to data loss
             * as its using 1 bit as signed flag.
             */
            if ("tinyint".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.SMALLINT.getType();
            }
            /**
             * Converting numeric, smallmoney data types into FLOAT8 to avoid data loss
             * (ex: 123.45 is shown as 123 (loosing its scale))
             */
            if ("numeric".equalsIgnoreCase(dataType) || "smallmoney".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.FLOAT8.getType();
            }
            /**
             * Converting time data type(s) into DATEMILLI since framework is unable to map it by default
             */
            if ("datetime".equalsIgnoreCase(dataType) || "datetime2".equalsIgnoreCase(dataType) || "smalldatetime".equalsIgnoreCase(dataType) || "datetimeoffset".equalsIgnoreCase(dataType)) {
                columnType = Types.MinorType.DATEMILLI.getType();
            }
            /**
             * converting into VARCHAR for non supported data types.
             */
            if (columnType == null) {
                columnType = Types.MinorType.VARCHAR.getType();
            }
            if (columnType != null && !SupportedTypes.isSupported(columnType)) {
                columnType = Types.MinorType.VARCHAR.getType();
            }
            LOGGER.debug("columnType: " + columnType);
            if (columnType != null && SupportedTypes.isSupported(columnType)) {
                schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
                found = true;
            } else {
                LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
            }
        }
        if (!found) {
            throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
        }
        // add partition columns
        partitionSchema.getFields().forEach(schemaBuilder::addField);
        return schemaBuilder.build();
    }
}
Also used : HashMap(java.util.HashMap) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) Connection(java.sql.Connection) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) PreparedStatement(java.sql.PreparedStatement)

Example 23 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class TeradataMetadataHandler method getSchema.

/**
 * @param jdbcConnection
 * @param tableName
 * @param partitionSchema
 * @return
 * @throws SQLException
 */
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData())) {
        boolean found = false;
        while (resultSet.next()) {
            ArrowType columnType = toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
            String columnName = resultSet.getString("COLUMN_NAME");
            LOGGER.info("Column Name: " + columnName);
            LOGGER.info("Column Type: " + columnType.getTypeID());
            /**
             * Convert decimal into BigInt
             */
            if (columnType != null && columnType.getTypeID().equals(ArrowType.ArrowTypeID.Decimal)) {
                String[] data = columnType.toString().split(",");
                if (data[0].contains("0") || data[1].contains("0")) {
                    columnType = org.apache.arrow.vector.types.Types.MinorType.BIGINT.getType();
                }
            }
            if (columnType != null && SupportedTypes.isSupported(columnType)) {
                if (columnType instanceof ArrowType.List) {
                    schemaBuilder.addListField(columnName, getArrayArrowTypeFromTypeName(resultSet.getString("TYPE_NAME"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS")));
                } else {
                    LOGGER.info("getSchema:columnType is not instance of ArrowType column[" + columnName + "] to a supported type, attempted " + columnType + " - defaulting type to VARCHAR.");
                    schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
                }
            } else {
                // Default to VARCHAR ArrowType
                LOGGER.info("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType + " - defaulting type to VARCHAR.");
                schemaBuilder.addField(FieldBuilder.newBuilder(columnName, new ArrowType.Utf8()).build());
            }
            found = true;
        }
        if (!found) {
            throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
        }
        // add partition columns
        partitionSchema.getFields().forEach(schemaBuilder::addField);
        return schemaBuilder.build();
    }
}
Also used : SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) ResultSet(java.sql.ResultSet) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) List(java.util.List)

Example 24 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class TPCDSRecordHandlerTest method setUp.

@Before
public void setUp() throws Exception {
    for (Table next : Table.getBaseTables()) {
        if (next.getName().equals("customer")) {
            table = next;
        }
    }
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
    for (Column nextCol : table.getColumns()) {
        schemaBuilder.addField(TPCDSUtils.convertColumn(nextCol));
    }
    schemaForRead = schemaBuilder.build();
    mockS3Storage = new ArrayList<>();
    allocator = new BlockAllocatorImpl();
    handler = new TPCDSRecordHandler(mockS3, mockSecretsManager, mockAthena);
    spillReader = new S3BlockSpillReader(mockS3, allocator);
    when(mockS3.putObject(anyObject(), anyObject(), anyObject(), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        synchronized (mockS3Storage) {
            InputStream inputStream = (InputStream) invocationOnMock.getArguments()[2];
            ByteHolder byteHolder = new ByteHolder();
            byteHolder.setBytes(ByteStreams.toByteArray(inputStream));
            mockS3Storage.add(byteHolder);
            return mock(PutObjectResult.class);
        }
    });
    when(mockS3.getObject(anyString(), anyString())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        synchronized (mockS3Storage) {
            S3Object mockObject = mock(S3Object.class);
            ByteHolder byteHolder = mockS3Storage.get(0);
            mockS3Storage.remove(0);
            when(mockObject.getObjectContent()).thenReturn(new S3ObjectInputStream(new ByteArrayInputStream(byteHolder.getBytes()), null));
            return mockObject;
        }
    });
}
Also used : Table(com.teradata.tpcds.Table) ByteArrayInputStream(java.io.ByteArrayInputStream) S3ObjectInputStream(com.amazonaws.services.s3.model.S3ObjectInputStream) InputStream(java.io.InputStream) S3BlockSpillReader(com.amazonaws.athena.connector.lambda.data.S3BlockSpillReader) S3ObjectInputStream(com.amazonaws.services.s3.model.S3ObjectInputStream) Column(com.teradata.tpcds.column.Column) BlockAllocatorImpl(com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl) ByteArrayInputStream(java.io.ByteArrayInputStream) InvocationOnMock(org.mockito.invocation.InvocationOnMock) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) S3Object(com.amazonaws.services.s3.model.S3Object) Before(org.junit.Before)

Example 25 with SchemaBuilder

use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.

the class TimestreamMetadataHandler method doGetTable.

@Override
public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest request) throws Exception {
    logger.info("doGetTable: enter", request.getTableName());
    Schema schema = null;
    try {
        if (glue != null) {
            schema = super.doGetTable(blockAllocator, request, TABLE_FILTER).getSchema();
            logger.info("doGetTable: Retrieved schema for table[{}] from AWS Glue.", request.getTableName());
        }
    } catch (RuntimeException ex) {
        logger.warn("doGetTable: Unable to retrieve table[{}:{}] from AWS Glue.", request.getTableName().getSchemaName(), request.getTableName().getTableName(), ex);
    }
    if (schema == null) {
        TableName tableName = request.getTableName();
        String describeQuery = queryFactory.createDescribeTableQueryBuilder().withTablename(tableName.getTableName()).withDatabaseName(tableName.getSchemaName()).build();
        logger.info("doGetTable: Retrieving schema for table[{}] from TimeStream using describeQuery[{}].", request.getTableName(), describeQuery);
        QueryRequest queryRequest = new QueryRequest().withQueryString(describeQuery);
        SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
        do {
            QueryResult queryResult = tsQuery.query(queryRequest);
            for (Row next : queryResult.getRows()) {
                List<Datum> datum = next.getData();
                if (datum.size() != 3) {
                    throw new RuntimeException("Unexpected datum size " + datum.size() + " while getting schema from datum[" + datum.toString() + "]");
                }
                Field nextField = TimestreamSchemaUtils.makeField(datum.get(0).getScalarValue(), datum.get(1).getScalarValue());
                schemaBuilder.addField(nextField);
            }
            queryRequest = new QueryRequest().withNextToken(queryResult.getNextToken());
        } while (queryRequest.getNextToken() != null);
        schema = schemaBuilder.build();
    }
    return new GetTableResponse(request.getCatalogName(), request.getTableName(), schema);
}
Also used : TableName(com.amazonaws.athena.connector.lambda.domain.TableName) Field(org.apache.arrow.vector.types.pojo.Field) QueryResult(com.amazonaws.services.timestreamquery.model.QueryResult) Datum(com.amazonaws.services.timestreamquery.model.Datum) QueryRequest(com.amazonaws.services.timestreamquery.model.QueryRequest) GetTableResponse(com.amazonaws.athena.connector.lambda.metadata.GetTableResponse) Schema(org.apache.arrow.vector.types.pojo.Schema) SchemaBuilder(com.amazonaws.athena.connector.lambda.data.SchemaBuilder) Row(com.amazonaws.services.timestreamquery.model.Row)

Aggregations

SchemaBuilder (com.amazonaws.athena.connector.lambda.data.SchemaBuilder)68 Schema (org.apache.arrow.vector.types.pojo.Schema)48 TableName (com.amazonaws.athena.connector.lambda.domain.TableName)43 Test (org.junit.Test)43 PreparedStatement (java.sql.PreparedStatement)37 ResultSet (java.sql.ResultSet)35 Constraints (com.amazonaws.athena.connector.lambda.domain.predicate.Constraints)30 BlockAllocatorImpl (com.amazonaws.athena.connector.lambda.data.BlockAllocatorImpl)23 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)23 BlockAllocator (com.amazonaws.athena.connector.lambda.data.BlockAllocator)20 Split (com.amazonaws.athena.connector.lambda.domain.Split)17 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)17 ArrayList (java.util.ArrayList)15 ValueSet (com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet)12 GetTableLayoutResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutResponse)12 GetTableResponse (com.amazonaws.athena.connector.lambda.metadata.GetTableResponse)12 GetTableLayoutRequest (com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest)11 Connection (java.sql.Connection)10 HashMap (java.util.HashMap)10 ImmutableMap (com.google.common.collect.ImmutableMap)8