use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class GlueMetadataHandlerTest method populateSourceTableFromLocation.
@Test
public void populateSourceTableFromLocation() {
Map<String, String> params = new HashMap<>();
List<String> partitions = Arrays.asList("aws", "aws-cn", "aws-us-gov");
for (String partition : partitions) {
StorageDescriptor storageDescriptor = new StorageDescriptor().withLocation(String.format("arn:%s:dynamodb:us-east-1:012345678910:table/My-Table", partition));
Table table = new Table().withParameters(params).withStorageDescriptor(storageDescriptor);
SchemaBuilder schemaBuilder = new SchemaBuilder();
populateSourceTableNameIfAvailable(table, schemaBuilder);
Schema schema = schemaBuilder.build();
assertEquals("My-Table", getSourceTableName(schema));
}
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class SchemaSerializationTest method serializationTest.
@Test
public void serializationTest() throws IOException {
logger.info("serializationTest - enter");
SchemaBuilder schemaBuilder = new SchemaBuilder();
schemaBuilder.addMetadata("meta1", "meta-value-1");
schemaBuilder.addMetadata("meta2", "meta-value-2");
schemaBuilder.addField("intfield1", new ArrowType.Int(32, true));
schemaBuilder.addField("doublefield2", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
schemaBuilder.addField("varcharfield3", new ArrowType.Utf8());
Schema expectedSchema = schemaBuilder.build();
SchemaSerDe serDe = new SchemaSerDe();
ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
serDe.serialize(expectedSchema, schemaOut);
TestPojo expected = new TestPojo(expectedSchema);
ByteArrayOutputStream out = new ByteArrayOutputStream();
objectMapper.writeValue(out, expected);
TestPojo actual = objectMapper.readValue(new ByteArrayInputStream(out.toByteArray()), TestPojo.class);
Schema actualSchema = actual.getSchema();
logger.info("serializationTest - fields[{}]", actualSchema.getFields());
logger.info("serializationTest - meta[{}]", actualSchema.getCustomMetadata());
assertEquals(expectedSchema.getFields(), actualSchema.getFields());
assertEquals(expectedSchema.getCustomMetadata(), actualSchema.getCustomMetadata());
logger.info("serializationTest - exit");
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class BigQueryMetadataHandler method getSchema.
/**
* Getting Bigquery table schema details
* @param projectName
* @param datasetName
* @param tableName
* @return
*/
private Schema getSchema(String projectName, String datasetName, String tableName) {
Schema schema = null;
datasetName = fixCaseForDatasetName(projectName, datasetName, bigQuery);
tableName = fixCaseForTableName(projectName, datasetName, tableName, bigQuery);
TableId tableId = TableId.of(projectName, datasetName, tableName);
Table response = bigQuery.getTable(tableId);
TableDefinition tableDefinition = response.getDefinition();
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
List<String> timeStampColsList = new ArrayList<>();
for (Field field : tableDefinition.getSchema().getFields()) {
if (field.getType().getStandardType().toString().equals("TIMESTAMP")) {
timeStampColsList.add(field.getName());
}
schemaBuilder.addField(field.getName(), translateToArrowType(field.getType()));
}
schemaBuilder.addMetadata("timeStampCols", timeStampColsList.toString());
logger.debug("BigQuery table schema {}", schemaBuilder.toString());
return schemaBuilder.build();
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class HbaseMetadataHandler method doGetTable.
/**
* If Glue is enabled as a source of supplemental metadata we look up the requested Schema/Table in Glue and
* filters out any results that don't have the HBASE_METADATA_FLAG set. If no matching results were found in Glue,
* then we resort to inferring the schema of the HBase table using HbaseSchemaUtils.inferSchema(...). If there
* is no such table in HBase the operation will fail.
*
* @see GlueMetadataHandler
*/
@Override
public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest request) throws Exception {
logger.info("doGetTable: enter", request.getTableName());
Schema origSchema = null;
try {
if (awsGlue != null) {
origSchema = super.doGetTable(blockAllocator, request, TABLE_FILTER).getSchema();
}
} catch (RuntimeException ex) {
logger.warn("doGetTable: Unable to retrieve table[{}:{}] from AWSGlue.", request.getTableName().getSchemaName(), request.getTableName().getTableName(), ex);
}
if (origSchema == null) {
origSchema = HbaseSchemaUtils.inferSchema(getOrCreateConn(request), request.getTableName(), NUM_ROWS_TO_SCAN);
}
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
origSchema.getFields().forEach((Field field) -> schemaBuilder.addField(field.getName(), field.getType(), field.getChildren()));
origSchema.getCustomMetadata().entrySet().forEach((Map.Entry<String, String> meta) -> schemaBuilder.addMetadata(meta.getKey(), meta.getValue()));
schemaBuilder.addField(HbaseSchemaUtils.ROW_COLUMN_NAME, Types.MinorType.VARCHAR.getType());
Schema schema = schemaBuilder.build();
logger.info("doGetTable: return {}", schema);
return new GetTableResponse(request.getCatalogName(), request.getTableName(), schema);
}
use of com.amazonaws.athena.connector.lambda.data.SchemaBuilder in project aws-athena-query-federation by awslabs.
the class HiveRecordHandlerTest method buildSplitSql.
@Test
public void buildSplitSql() throws SQLException {
TableName tableName = new TableName("testSchema", "testTable");
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("testCol1", Types.MinorType.INT.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol2", Types.MinorType.DATEDAY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol3", Types.MinorType.DATEMILLI.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("testCol4", Types.MinorType.VARBINARY.getType()).build());
schemaBuilder.addField(FieldBuilder.newBuilder("partition", Types.MinorType.VARCHAR.getType()).build());
Schema schema = schemaBuilder.build();
Split split = Mockito.mock(Split.class);
Mockito.when(split.getProperties()).thenReturn(Collections.singletonMap("partition", "p0"));
Mockito.when(split.getProperty(Mockito.eq("partition"))).thenReturn("p0");
Range range1a = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range1a.isSingleValue()).thenReturn(true);
Mockito.when(range1a.getLow().getValue()).thenReturn(1);
Range range1b = Mockito.mock(Range.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(range1b.isSingleValue()).thenReturn(true);
Mockito.when(range1b.getLow().getValue()).thenReturn(2);
ValueSet valueSet1 = Mockito.mock(SortedRangeSet.class, Mockito.RETURNS_DEEP_STUBS);
Mockito.when(valueSet1.getRanges().getOrderedRanges()).thenReturn(ImmutableList.of(range1a, range1b));
final long dateDays = TimeUnit.DAYS.toDays(Date.valueOf("2020-01-05").getTime());
ValueSet valueSet2 = getSingleValueSet(dateDays);
Constraints constraints = Mockito.mock(Constraints.class);
Mockito.when(constraints.getSummary()).thenReturn(new ImmutableMap.Builder<String, ValueSet>().put("testCol2", valueSet2).build());
PreparedStatement expectedPreparedStatement = Mockito.mock(PreparedStatement.class);
Mockito.when(this.connection.prepareStatement(Mockito.anyString())).thenReturn(expectedPreparedStatement);
PreparedStatement preparedStatement = this.hiveRecordHandler.buildSplitSql(this.connection, "testCatalogName", tableName, schema, constraints, split);
Assert.assertEquals(expectedPreparedStatement, preparedStatement);
}
Aggregations