Search in sources :

Example 1 with QueryResultColumn

use of com.thinkbiganalytics.discovery.schema.QueryResultColumn in project kylo by Teradata.

the class SparkFileSchemaParserServiceTest method newColumn.

private QueryResultColumn newColumn(String name, String dataType) {
    QueryResultColumn column = new DefaultQueryResultColumn();
    column.setField(name);
    column.setDisplayName(name);
    column.setTableName("table");
    column.setDataType(dataType);
    column.setDatabaseName("database");
    return column;
}
Also used : DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn)

Example 2 with QueryResultColumn

use of com.thinkbiganalytics.discovery.schema.QueryResultColumn in project kylo by Teradata.

the class SqlTransformStage method extractSchema.

/**
 * Builds the Spark SQL schema from the specified result set.
 */
@Nonnull
private StructType extractSchema(@Nonnull final ResultSetMetaData rsmd, @Nonnull final TransformResult result) throws SQLException {
    final int columnCount = rsmd.getColumnCount();
    final List<QueryResultColumn> columns = new ArrayList<>(columnCount);
    final Map<String, Integer> displayNameMap = new HashMap<>();
    final StructField[] fields = new StructField[columnCount];
    for (int i = 0; i < columnCount; ++i) {
        final String columnLabel = rsmd.getColumnLabel(i + 1);
        final String columnName = rsmd.getColumnName(i + 1);
        final int columnType = rsmd.getColumnType(i + 1);
        final String columnTypeName = rsmd.getColumnTypeName(i + 1);
        final int precision = rsmd.getPrecision(i + 1);
        final int scale = rsmd.getScale(i + 1);
        final boolean isNullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls;
        final boolean isSigned = rsmd.isSigned(i + 1);
        final DefaultQueryResultColumn column = new DefaultQueryResultColumn();
        column.setField(columnName);
        column.setHiveColumnLabel(columnLabel);
        final String displayName = StringUtils.contains(columnLabel, ".") ? StringUtils.substringAfterLast(columnLabel, ".") : columnLabel;
        Integer count = 0;
        if (displayNameMap.containsKey(displayName)) {
            count = displayNameMap.get(displayName);
            count++;
        }
        displayNameMap.put(displayName, count);
        column.setDisplayName(displayName + "" + (count > 0 ? count : ""));
        column.setTableName(StringUtils.substringAfterLast(columnName, "."));
        column.setDataType(ParserHelper.sqlTypeToHiveType(columnType));
        column.setNativeDataType(columnTypeName);
        if (scale != 0) {
            column.setPrecisionScale(precision + "," + scale);
        } else if (precision != 0) {
            column.setPrecisionScale(Integer.toString(precision));
        }
        columns.add(column);
        final MetadataBuilder metadata = new MetadataBuilder();
        final Option<DataType> oct = dialect.getCatalystType(columnType, columnTypeName, precision, metadata);
        DataType catalystType;
        if (oct.isDefined()) {
            catalystType = oct.get();
        } else {
            catalystType = getCatalystType(columnType, precision, scale, isSigned);
        }
        fields[i] = new StructField(columnLabel, catalystType, isNullable, metadata.build());
    }
    result.setColumns(columns);
    return new StructType(fields);
}
Also used : MetadataBuilder(org.apache.spark.sql.types.MetadataBuilder) StructType(org.apache.spark.sql.types.StructType) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) StructField(org.apache.spark.sql.types.StructField) DataType(org.apache.spark.sql.types.DataType) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn) Nonnull(javax.annotation.Nonnull)

Example 3 with QueryResultColumn

use of com.thinkbiganalytics.discovery.schema.QueryResultColumn in project kylo by Teradata.

the class QueryRunner method initQueryResult.

/**
 * Initializes the query result with the specified metadata.
 *
 * @param queryResult the query result to initialize
 * @param rsMetaData  the result set metadata for the query
 * @throws SQLException if the metadata is not available
 */
private void initQueryResult(@Nonnull final DefaultQueryResult queryResult, @Nonnull final ResultSetMetaData rsMetaData) throws SQLException {
    final List<QueryResultColumn> columns = new ArrayList<>();
    final Map<String, Integer> displayNameMap = new HashMap<>();
    for (int i = 1; i <= rsMetaData.getColumnCount(); i++) {
        final DefaultQueryResultColumn column = new DefaultQueryResultColumn();
        column.setField(rsMetaData.getColumnName(i));
        String displayName = rsMetaData.getColumnLabel(i);
        column.setHiveColumnLabel(displayName);
        // remove the table name if it exists
        displayName = StringUtils.contains(displayName, ".") ? StringUtils.substringAfterLast(displayName, ".") : displayName;
        Integer count = 0;
        if (displayNameMap.containsKey(displayName)) {
            count = displayNameMap.get(displayName);
            count++;
        }
        displayNameMap.put(displayName, count);
        column.setDisplayName(displayName + "" + (count > 0 ? count : ""));
        column.setTableName(StringUtils.substringAfterLast(rsMetaData.getColumnName(i), "."));
        column.setDataType(ParserHelper.sqlTypeToHiveType(rsMetaData.getColumnType(i)));
        column.setNativeDataType(rsMetaData.getColumnTypeName(i));
        int precision = rsMetaData.getPrecision(i);
        int scale = rsMetaData.getScale(i);
        if (scale != 0) {
            column.setPrecisionScale(precision + "," + scale);
        } else if (precision != 0) {
            column.setPrecisionScale(Integer.toString(precision));
        }
        columns.add(column);
    }
    queryResult.setColumns(columns);
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn)

Example 4 with QueryResultColumn

use of com.thinkbiganalytics.discovery.schema.QueryResultColumn in project kylo by Teradata.

the class QueryRunner method query.

/**
 * Executes the specified SELECT query and returns the results.
 *
 * @param query the SELECT query
 * @return the query result
 * @throws DataAccessException if the query cannot be executed
 */
public QueryResult query(String query) {
    // Validate the query
    if (!validateQuery(query)) {
        throw new DataRetrievalFailureException("Invalid query: " + query);
    }
    // Execute the query
    final DefaultQueryResult queryResult = new DefaultQueryResult(query);
    jdbcTemplate.query(query, new RowCallbackHandler() {

        @Override
        public void processRow(ResultSet rs) throws SQLException {
            // First-time initialization
            if (queryResult.isEmpty()) {
                QueryRunner.this.initQueryResult(queryResult, rs.getMetaData());
            }
            // Add row to the result
            final Map<String, Object> row = new LinkedHashMap<>();
            for (final QueryResultColumn column : queryResult.getColumns()) {
                row.put(column.getDisplayName(), rs.getObject(column.getHiveColumnLabel()));
            }
            queryResult.addRow(row);
        }
    });
    return queryResult;
}
Also used : DefaultQueryResult(com.thinkbiganalytics.discovery.model.DefaultQueryResult) SQLException(java.sql.SQLException) ResultSet(java.sql.ResultSet) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) RowCallbackHandler(org.springframework.jdbc.core.RowCallbackHandler) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 5 with QueryResultColumn

use of com.thinkbiganalytics.discovery.schema.QueryResultColumn in project kylo by Teradata.

the class SparkFileSchemaParserService method toHiveSchema.

private DefaultHiveSchema toHiveSchema(TransformQueryResult result, SparkFileType fileType) {
    DefaultHiveSchema schema = new DefaultHiveSchema();
    schema.setHiveFormat("STORED AS " + fileType);
    schema.setStructured(true);
    ArrayList<Field> fields = new ArrayList<>();
    List<? extends QueryResultColumn> columns = result.getColumns();
    for (int i = 0; i < columns.size(); ++i) {
        QueryResultColumn column = columns.get(i);
        DefaultField field = new DefaultField();
        field.setName(column.getDisplayName());
        field.setNativeDataType(column.getDataType());
        field.setDerivedDataType(column.getDataType());
        field.setDataTypeDescriptor(ParserHelper.hiveTypeToDescriptor(column.getDataType()));
        // strip the precisionScale and assign to the field property
        setPrecisionAndScale(field);
        // Add sample values
        List<List<Object>> values = result.getRows();
        for (List<Object> colMap : values) {
            Object oVal = colMap.get(i);
            if (oVal != null) {
                field.getSampleValues().add(oVal.toString());
            }
        }
        fields.add(field);
    }
    schema.setFields(fields);
    return schema;
}
Also used : DefaultField(com.thinkbiganalytics.discovery.model.DefaultField) Field(com.thinkbiganalytics.discovery.schema.Field) DefaultHiveSchema(com.thinkbiganalytics.discovery.model.DefaultHiveSchema) ArrayList(java.util.ArrayList) DefaultField(com.thinkbiganalytics.discovery.model.DefaultField) ArrayList(java.util.ArrayList) List(java.util.List) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn)

Aggregations

QueryResultColumn (com.thinkbiganalytics.discovery.schema.QueryResultColumn)6 DefaultQueryResultColumn (com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 LinkedHashMap (java.util.LinkedHashMap)2 DefaultField (com.thinkbiganalytics.discovery.model.DefaultField)1 DefaultHiveSchema (com.thinkbiganalytics.discovery.model.DefaultHiveSchema)1 DefaultQueryResult (com.thinkbiganalytics.discovery.model.DefaultQueryResult)1 Field (com.thinkbiganalytics.discovery.schema.Field)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 List (java.util.List)1 Map (java.util.Map)1 Nonnull (javax.annotation.Nonnull)1 DataType (org.apache.spark.sql.types.DataType)1 MetadataBuilder (org.apache.spark.sql.types.MetadataBuilder)1 StructField (org.apache.spark.sql.types.StructField)1 StructType (org.apache.spark.sql.types.StructType)1 DataRetrievalFailureException (org.springframework.dao.DataRetrievalFailureException)1 RowCallbackHandler (org.springframework.jdbc.core.RowCallbackHandler)1