use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class HiveMetadataHandler method getSchema.
/**
* Used to convert Hive data types to Apache arrow data types
* @param jdbcConnection A JDBC Hive database connection
* @param tableName Holds table name and schema name. see {@link TableName}
* @param partitionSchema A partition schema for a given table .See {@link Schema}
* @return Schema Holds Table schema along with partition schema. See {@link Schema}
* @throws SQLException A SQLException should be thrown for database connection failures , query syntax errors and so on.
*/
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData());
Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
try (PreparedStatement psmt = connection.prepareStatement(GET_METADATA_QUERY + tableName.getTableName().toUpperCase())) {
Map<String, String> meteHashMap = getMetadataForGivenTable(psmt);
while (resultSet.next()) {
ArrowType columnType = JdbcArrowTypeConverter.toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
String columnName = resultSet.getString(HiveConstants.COLUMN_NAME);
String dataType = meteHashMap.get(columnName);
LOGGER.debug("columnName:" + columnName);
LOGGER.debug("dataType:" + dataType);
if (dataType != null && (dataType.toUpperCase().contains("DATE"))) {
columnType = Types.MinorType.DATEDAY.getType();
}
if (dataType != null && (dataType.toUpperCase().contains("BINARY"))) {
columnType = Types.MinorType.VARBINARY.getType();
}
/**
* Converting double data type into FLOAT8 MinorType
*/
if (dataType != null && dataType.toUpperCase().contains("DOUBLE")) {
columnType = Types.MinorType.FLOAT8.getType();
}
/**
* Converting boolean data type into BIT MinorType
*/
if (dataType != null && dataType.toUpperCase().contains("BOOLEAN")) {
columnType = Types.MinorType.BIT.getType();
}
/**
* Converting float data type into FLOAT4 MinorType
*/
if (dataType != null && dataType.contains("FLOAT")) {
columnType = Types.MinorType.FLOAT4.getType();
}
/**
* Converting TIMESTAMP data type into DATEMILLI MinorType
*/
if (dataType != null && (dataType.toUpperCase().contains("TIMESTAMP"))) {
columnType = Types.MinorType.DATEMILLI.getType();
}
/**
* Converting other data type into VARCHAR MinorType
*/
if ((columnType == null) || (columnType != null && !SupportedTypes.isSupported(columnType))) {
columnType = Types.MinorType.VARCHAR.getType();
}
schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
}
}
partitionSchema.getFields().forEach(schemaBuilder::addField);
return schemaBuilder.build();
}
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class JdbcMetadataHandler method getSchema.
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData())) {
boolean found = false;
while (resultSet.next()) {
ArrowType columnType = JdbcArrowTypeConverter.toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
String columnName = resultSet.getString("COLUMN_NAME");
if (columnType != null && SupportedTypes.isSupported(columnType)) {
if (columnType instanceof ArrowType.List) {
schemaBuilder.addListField(columnName, getArrayArrowTypeFromTypeName(resultSet.getString("TYPE_NAME"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS")));
} else {
schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
}
} else {
// Default to VARCHAR ArrowType
LOGGER.warn("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType + " - defaulting type to VARCHAR.");
schemaBuilder.addField(FieldBuilder.newBuilder(columnName, new ArrowType.Utf8()).build());
}
found = true;
}
if (!found) {
throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
}
// add partition columns
partitionSchema.getFields().forEach(schemaBuilder::addField);
return schemaBuilder.build();
}
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class OracleMetadataHandler method getSchema.
/**
* @param jdbcConnection
* @param tableName
* @param partitionSchema
* @return
* @throws SQLException
*/
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws SQLException {
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData());
Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) {
boolean found = false;
HashMap<String, String> hashMap = new HashMap<String, String>();
/**
* Getting original data type from oracle table for conversion
*/
try (PreparedStatement stmt = connection.prepareStatement("select COLUMN_NAME ,DATA_TYPE from USER_TAB_COLS where table_name =?")) {
stmt.setString(1, tableName.getTableName().toUpperCase());
ResultSet dataTypeResultSet = stmt.executeQuery();
while (dataTypeResultSet.next()) {
hashMap.put(dataTypeResultSet.getString(COLUMN_NAME).trim(), dataTypeResultSet.getString("DATA_TYPE").trim());
}
while (resultSet.next()) {
ArrowType columnType = JdbcArrowTypeConverter.toArrowType(resultSet.getInt("DATA_TYPE"), resultSet.getInt("COLUMN_SIZE"), resultSet.getInt("DECIMAL_DIGITS"));
String columnName = resultSet.getString(COLUMN_NAME);
/**
* Handling TIMESTAMP,DATE, 0 Precesion*
*/
if (columnType != null && columnType.getTypeID().equals(ArrowType.ArrowTypeID.Decimal)) {
String[] data = columnType.toString().split(",");
if (data[0].contains("0") || data[1].contains("0")) {
columnType = Types.MinorType.BIGINT.getType();
}
/**
* Handling negative scale issue
*/
if (Integer.parseInt(data[1].trim().replace(")", "")) < 0.0) {
columnType = Types.MinorType.VARCHAR.getType();
}
}
String dataType = hashMap.get(columnName);
LOGGER.debug("columnName: " + columnName);
LOGGER.debug("dataType: " + dataType);
/**
* Converting oracle date data type into DATEDAY MinorType
*/
if (dataType != null && (dataType.contains("date") || dataType.contains("DATE"))) {
columnType = Types.MinorType.DATEDAY.getType();
}
/**
* Converting oracle NUMBER data type into BIGINT MinorType
*/
if (dataType != null && (dataType.contains("NUMBER")) && columnType.getTypeID().toString().equalsIgnoreCase("Utf8")) {
columnType = Types.MinorType.BIGINT.getType();
}
/**
* Converting oracle TIMESTAMP data type into DATEMILLI MinorType
*/
if (dataType != null && (dataType.contains("TIMESTAMP"))) {
columnType = Types.MinorType.DATEMILLI.getType();
}
if (columnType == null) {
columnType = Types.MinorType.VARCHAR.getType();
}
if (columnType != null && !SupportedTypes.isSupported(columnType)) {
columnType = Types.MinorType.VARCHAR.getType();
}
if (columnType != null && SupportedTypes.isSupported(columnType)) {
schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
found = true;
} else {
LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
}
}
}
if (!found) {
throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
}
partitionSchema.getFields().forEach(schemaBuilder::addField);
LOGGER.debug("Oracle Table Schema" + schemaBuilder.toString());
return schemaBuilder.build();
}
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class PostGreSqlRecordHandlerTest method buildSplitSqlTest.
@Test
public void buildSplitSqlTest() throws SQLException {
logger.info("buildSplitSqlTest - enter");
TableName tableName = new TableName("testSchema", "testTable");
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.VARCHAR.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.BIGINT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.FLOAT4.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol5", Types.MinorType.SMALLINT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol6", Types.MinorType.TINYINT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol7", Types.MinorType.FLOAT8.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol8", Types.MinorType.BIT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol9", new ArrowType.Decimal(8, 2)).build());
schemaBuilder.addField(FieldBuilder.newBuilder("partition_schema_name", Types.MinorType.VARCHAR.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("partition_name", Types.MinorType.VARCHAR.getType()).build());
Schema schema = schemaBuilder.build();
Split split = Mockito.mock(Split.class);
Mockito.when(split.getProperties()).thenReturn(ImmutableMap.of("partition_schema_name", "s0", "partition_name", "p0"));
Mockito.when(split.getProperty(Mockito.eq(com.amazonaws.athena.connectors.postgresql.PostGreSqlMetadataHandler.BLOCK_PARTITION_SCHEMA_COLUMN_NAME))).thenReturn("s0");
Mockito.when(split.getProperty(Mockito.eq(com.amazonaws.athena.connectors.postgresql.PostGreSqlMetadataHandler.BLOCK_PARTITION_COLUMN_NAME))).thenReturn("p0");
Range range1a = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range1a.isSingleValue()).thenReturn(true);
Mockito.when(range1a.getLow().getValue()).thenReturn(1);
Range range1b = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range1b.isSingleValue()).thenReturn(true);
Mockito.when(range1b.getLow().getValue()).thenReturn(2);
ValueSet valueSet1 = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(valueSet1.getRanges().getOrderedRanges()).thenReturn(ImmutableList.of(range1a, range1b));
ValueSet valueSet2 = getRangeSet(Marker.Bound.EXACTLY, "1", Marker.Bound.BELOW, "10");
ValueSet valueSet3 = getRangeSet(Marker.Bound.ABOVE, 2L, Marker.Bound.EXACTLY, 20L);
ValueSet valueSet4 = getSingleValueSet(1.1F);
ValueSet valueSet5 = getSingleValueSet(1);
ValueSet valueSet6 = getSingleValueSet(0);
ValueSet valueSet7 = getSingleValueSet(1.2d);
ValueSet valueSet8 = getSingleValueSet(true);
ValueSet valueSet9 = getSingleValueSet(BigDecimal.valueOf(12.34));
Constraints constraints = Mockito.mock(Constraints.class);
Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol1", valueSet1).put("testCol2", valueSet2).put("testCol3", valueSet3).put("testCol4", valueSet4).put("testCol5", valueSet5).put("testCol6", valueSet6).put("testCol7", valueSet7).put("testCol8", valueSet8).put("testCol9", valueSet9).build());
String expectedSql = "SELECT \"testCol1\", \"testCol2\", \"testCol3\", \"testCol4\", \"testCol5\", \"testCol6\", \"testCol7\", \"testCol8\", \"testCol9\" FROM \"s0\".\"p0\" WHERE (\"testCol1\" IN (?,?)) AND ((\"testCol2\" >= ? AND \"testCol2\" < ?)) AND ((\"testCol3\" > ? AND \"testCol3\" <= ?)) AND (\"testCol4\" = ?) AND (\"testCol5\" = ?) AND (\"testCol6\" = ?) AND (\"testCol7\" = ?) AND (\"testCol8\" = ?) AND (\"testCol9\" = ?)";
PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(Mockito.eq(expectedSql))).thenReturn(expectedPreparedStatement);
PreparedStatement preparedStatement = this.postGreSqlRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Assert.assertEquals(expectedPreparedStatement, preparedStatement);
Mockito.verify(preparedStatement, Mockito.times(1)).setInt(1, 1);
Mockito.verify(preparedStatement, Mockito.times(1)).setInt(2, 2);
Mockito.verify(preparedStatement, Mockito.times(1)).setString(3, "1");
Mockito.verify(preparedStatement, Mockito.times(1)).setString(4, "10");
Mockito.verify(preparedStatement, Mockito.times(1)).setLong(5, 2L);
Mockito.verify(preparedStatement, Mockito.times(1)).setLong(6, 20L);
Mockito.verify(preparedStatement, Mockito.times(1)).setFloat(7, 1.1F);
Mockito.verify(preparedStatement, Mockito.times(1)).setShort(8, (short) 1);
Mockito.verify(preparedStatement, Mockito.times(1)).setByte(9, (byte) 0);
Mockito.verify(preparedStatement, Mockito.times(1)).setDouble(10, 1.2d);
Mockito.verify(preparedStatement, Mockito.times(1)).setBoolean(11, true);
Mockito.verify(preparedStatement, Mockito.times(1)).setBigDecimal(12, BigDecimal.valueOf(12.34));
logger.info("buildSplitSqlTest - exit");
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class PostGreSqlRecordHandlerTest method buildSplitSqlForDateTest.
@Test
public void buildSplitSqlForDateTest() throws SQLException {
logger.info("buildSplitSqlForDateTest - enter");
TableName tableName = new TableName("testSchema", "testTable");
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("testDate", Types.MinorType.DATEDAY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("partition_schema_name", Types.MinorType.VARCHAR.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("partition_name", Types.MinorType.VARCHAR.getType()).build());
Schema schema = schemaBuilder.build();
Split split = Mockito.mock(Split.class);
Mockito.when(split.getProperties()).thenReturn(ImmutableMap.of("partition_schema_name", "s0", "partition_name", "p0"));
Mockito.when(split.getProperty(Mockito.eq(com.amazonaws.athena.connectors.postgresql.PostGreSqlMetadataHandler.BLOCK_PARTITION_SCHEMA_COLUMN_NAME))).thenReturn("s0");
Mockito.when(split.getProperty(Mockito.eq(PostGreSqlMetadataHandler.BLOCK_PARTITION_COLUMN_NAME))).thenReturn("p0");
final long dateDays = TimeUnit.MILLISECONDS.toDays(Date.valueOf("2020-01-05").getTime());
ValueSet valueSet = getSingleValueSet(dateDays);
Constraints constraints = Mockito.mock(Constraints.class);
Mockito.when(constraints.getSummary()).thenReturn(Collections.singletonMap("testDate", valueSet));
String expectedSql = "SELECT \"testDate\" FROM \"s0\".\"p0\" WHERE (\"testDate\" = ?)";
PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(Mockito.eq(expectedSql))).thenReturn(expectedPreparedStatement);
PreparedStatement preparedStatement = this.postGreSqlRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Assert.assertEquals(expectedPreparedStatement, preparedStatement);
Mockito.verify(preparedStatement, Mockito.times(1)).setDate(1, new Date(TimeUnit.DAYS.toMillis(dateDays)));
logger.info("buildSplitSqlForDateTest - exit");
}
Aggregations