use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class SqlServerRecordHandlerTest method buildSplitSqlNew.
@Test
public void buildSplitSqlNew() throws SQLException {
TableName tableName = new TableName("testSchema", "testTable");
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARCHAR.getType()).build());
Schema schema = schemaBuilder.build();
Split split = Mockito.mock(Split.class);
Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_FUNCTION)).thenReturn("pf");
Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITIONING_COLUMN)).thenReturn("testCol1");
Mockito.when(split.getProperty(SqlServerMetadataHandler.PARTITION_NUMBER)).thenReturn("1");
ValueSet valueSet = getSingleValueSet("varcharTest");
Constraints constraints = Mockito.mock(Constraints.class);
Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol4", valueSet).build());
String expectedSql = "SELECT `testCol1`, `testCol2`, `testCol3`, `testCol4` FROM `testSchema`.`testTable` WHERE (`testCol4` = ?) AND $PARTITION.pf(testCol1) = 1";
PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(Mockito.eq(expectedSql))).thenReturn(expectedPreparedStatement);
PreparedStatement preparedStatement = this.sqlServerRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Assert.assertEquals(expectedPreparedStatement, preparedStatement);
Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, "varcharTest");
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class SqlServerMetadataHandler method getSchema.
/**
* Appropriate datatype to arrow type conversions will be done by fetching data types of columns
* @param jdbcConnection
* @param tableName
* @param partitionSchema
* @return
* @throws SQLException
*/
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
String dataTypeQuery = "SELECT C.NAME AS COLUMN_NAME, TYPE_NAME(C.USER_TYPE_ID) AS DATA_TYPE " + "FROM SYS.COLUMNS C " + "JOIN SYS.TYPES T " + "ON C.USER_TYPE_ID=T.USER_TYPE_ID " + "WHERE C.OBJECT_ID=OBJECT_ID(?)";
String dataType;
String columnName;
HashMap<String, String> hashMap = new HashMap<>();
boolean found = false;
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData());
Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
PreparedStatement stmt = connection.prepareStatement(dataTypeQuery)) {
// fetch data types of columns and prepare map with column name and datatype.
stmt.setString(1, tableName.getSchemaName() + "." + tableName.getTableName());
try (ResultSet dataTypeResultSet = stmt.executeQuery()) {
while (dataTypeResultSet.next()) {
dataType = dataTypeResultSet.getString("DATA_TYPE");
columnName = dataTypeResultSet.getString("COLUMN_NAME");
hashMap.put(columnName.trim(), dataType.trim());
}
}
while (resultSet.next()) {
ArrowType columnType = JdbcArrowTypeConverter.toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
columnName = resultSet.getString("COLUMN_NAME");
dataType = hashMap.get(columnName);
LOGGER.debug("columnName: " + columnName);
LOGGER.debug("dataType: " + dataType);
/**
* Converting date data type into DATEDAY since framework is unable to do it by default
*/
if ("date".equalsIgnoreCase(dataType)) {
columnType = Types.MinorType.DATEDAY.getType();
}
/**
* Converting bit data type into TINYINT because BIT type is showing 0 as false and 1 as true.
* we can avoid it by changing to TINYINT.
*/
if ("bit".equalsIgnoreCase(dataType)) {
columnType = Types.MinorType.TINYINT.getType();
}
/**
* Converting tinyint data type into SMALLINT.
* TINYINT range is 0 to 255 in SQL Server, usage of TINYINT(ArrowType) leads to data loss
* as its using 1 bit as signed flag.
*/
if ("tinyint".equalsIgnoreCase(dataType)) {
columnType = Types.MinorType.SMALLINT.getType();
}
/**
* Converting numeric, smallmoney data types into FLOAT8 to avoid data loss
* (ex: 123.45 is shown as 123 (loosing its scale))
*/
if ("numeric".equalsIgnoreCase(dataType) || "smallmoney".equalsIgnoreCase(dataType)) {
columnType = Types.MinorType.FLOAT8.getType();
}
/**
* Converting time data type(s) into DATEMILLI since framework is unable to map it by default
*/
if ("datetime".equalsIgnoreCase(dataType) || "datetime2".equalsIgnoreCase(dataType) || "smalldatetime".equalsIgnoreCase(dataType) || "datetimeoffset".equalsIgnoreCase(dataType)) {
columnType = Types.MinorType.DATEMILLI.getType();
}
/**
* converting into VARCHAR for non supported data types.
*/
if (columnType == null) {
columnType = Types.MinorType.VARCHAR.getType();
}
if (columnType != null && !SupportedTypes.isSupported(columnType)) {
columnType = Types.MinorType.VARCHAR.getType();
}
LOGGER.debug("columnType: " + columnType);
if (columnType != null && SupportedTypes.isSupported(columnType)) {
schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
found = true;
} else {
LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
}
}
if (!found) {
throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
}
// add partition columns
partitionSchema.getFields().forEach(schemaBuilder::addField);
return schemaBuilder.build();
}
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class SqlServerMetadataHandlerTest method doGetTableLayout.
@Test
public void doGetTableLayout() throws Exception {
BlockAllocator blockAllocator = new BlockAllocatorImpl();
Constraints constraints = Mockito.mock(Constraints.class);
TableName tableName = new TableName("testSchema", "testTable");
Schema partitionSchema = this.sqlServerMetadataHandler.getPartitionSchema("testCatalogName");
Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);
PreparedStatement rowCountPreparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.ROW_COUNT_QUERY)).thenReturn(rowCountPreparedStatement);
ResultSet rowCountResultSet = mockResultSet(new String[] { "ROW_COUNT" }, new int[] { Types.INTEGER }, new Object[][] { { 2 } }, new AtomicInteger(-1));
Mockito.when(rowCountPreparedStatement.executeQuery()).thenReturn(rowCountResultSet);
PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(sqlServerMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);
String[] columns = { sqlServerMetadataHandler.PARTITION_NUMBER };
int[] types = { Types.VARCHAR };
Object[][] values = { { "2" }, { "3" } };
ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);
Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);
GetTableLayoutResponse getTableLayoutResponse = this.sqlServerMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);
List<String> actualValues = new ArrayList<>();
for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) {
actualValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i));
}
Assert.assertEquals(Arrays.asList("[PARTITION_NUMBER : 1]", "[PARTITION_NUMBER : 2]", "[PARTITION_NUMBER : 3]"), actualValues);
SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder();
expectedSchemaBuilder.addField(FieldBuilder.newBuilder(sqlServerMetadataHandler.PARTITION_NUMBER, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build());
Schema expectedSchema = expectedSchemaBuilder.build();
Assert.assertEquals(expectedSchema, getTableLayoutResponse.getPartitions().getSchema());
Assert.assertEquals(tableName, getTableLayoutResponse.getTableName());
Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, getTableLayoutRequest.getTableName().getSchemaName() + "." + getTableLayoutRequest.getTableName().getTableName());
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class SynapseRecordHandlerTest method buildSplitSql.
@Test
public void buildSplitSql() throws SQLException {
TableName tableName = new TableName("testSchema", "testTable");
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARCHAR.getType()).build());
Schema schema = schemaBuilder.build();
Split split = Mockito.mock(Split.class);
Mockito.when(split.getProperty(SynapseMetadataHandler.PARTITION_COLUMN)).thenReturn("id");
Mockito.when(split.getProperty(SynapseMetadataHandler.PARTITION_BOUNDARY_FROM)).thenReturn("100000");
Mockito.when(split.getProperty(SynapseMetadataHandler.PARTITION_BOUNDARY_TO)).thenReturn("300000");
ValueSet valueSet = getSingleValueSet("varcharTest");
Constraints constraints = Mockito.mock(Constraints.class);
Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol4", valueSet).build());
String expectedSql = "SELECT `testCol1`, `testCol2`, `testCol3`, `testCol4` FROM `testSchema`.`testTable` WHERE (`testCol4` = ?) AND id > 100000 and id <= 300000";
PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(Mockito.eq(expectedSql))).thenReturn(expectedPreparedStatement);
PreparedStatement preparedStatement = this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Assert.assertEquals(expectedPreparedStatement, preparedStatement);
Mockito.verify(preparedStatement, Mockito.times(1)).setString(1, "varcharTest");
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class SynapseRecordHandlerTest method buildSplitSqlWithPartition.
@Test
public void buildSplitSqlWithPartition() throws SQLException {
TableName tableName = new TableName("testSchema", "testTable");
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARBINARY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("partition", Types.MinorType.VARCHAR.getType()).build());
Schema schema = schemaBuilder.build();
Split split = Mockito.mock(Split.class);
Mockito.when(split.getProperties()).thenReturn(Map.of("PARTITION_BOUNDARY_FROM", "0", "PARTITION_NUMBER", "1", "PARTITION_COLUMN", "testCol1", "PARTITION_BOUNDARY_TO", "100000"));
Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_FROM"))).thenReturn("0");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_NUMBER"))).thenReturn("1");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_COLUMN"))).thenReturn("testCol1");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_TO"))).thenReturn("100000");
Constraints constraints = Mockito.mock(Constraints.class);
PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(Mockito.anyString())).thenReturn(expectedPreparedStatement);
this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Mockito.when(split.getProperties()).thenReturn(Map.of("PARTITION_BOUNDARY_FROM", " ", "PARTITION_NUMBER", "1", "PARTITION_COLUMN", "testCol1", "PARTITION_BOUNDARY_TO", "100000"));
Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_FROM"))).thenReturn(" ");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_NUMBER"))).thenReturn("1");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_COLUMN"))).thenReturn("testCol1");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_TO"))).thenReturn("100000");
this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Mockito.when(split.getProperties()).thenReturn(Map.of("PARTITION_BOUNDARY_FROM", "300000", "PARTITION_NUMBER", "2", "PARTITION_COLUMN", "testCol1", "PARTITION_BOUNDARY_TO", " "));
Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_FROM"))).thenReturn("300000");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_NUMBER"))).thenReturn("1");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_COLUMN"))).thenReturn("testCol1");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_TO"))).thenReturn(" ");
this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Mockito.when(split.getProperties()).thenReturn(Map.of("PARTITION_BOUNDARY_FROM", " ", "PARTITION_NUMBER", "2", "PARTITION_COLUMN", "testCol1", "PARTITION_BOUNDARY_TO", " "));
Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_FROM"))).thenReturn(" ");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_NUMBER"))).thenReturn("1");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_COLUMN"))).thenReturn("testCol1");
Mockito.when(split.getProperty(Mockito.eq("PARTITION_BOUNDARY_TO"))).thenReturn(" ");
this.synapseRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
}
Aggregations