Search in sources :

Example 1 with DefaultQueryResultColumn

use of com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn in project kylo by Teradata.

the class SaveDataSetStage method getDataSet.

/**
 * Gets the data set for the specified transformation result.
 */
private DataSet getDataSet(@Nonnull final TransformResult transform) {
    DataSet dataset = transform.getDataSet();
    if (request.getFormat() != null && request.getFormat().equals("orc")) {
        // Ensure that column names comply with ORC standards
        final StructType schema = dataset.schema();
        final Column[] columns = new Column[schema.size()];
        final DefaultQueryResultColumn[] queryColumns = new QueryResultRowTransform(schema, "orc").columns();
        for (int i = 0; i < schema.size(); ++i) {
            if (!queryColumns[i].getField().equals(schema.apply(i).name())) {
                columns[i] = new Column(schema.apply(i).name()).as(queryColumns[i].getField());
            } else {
                columns[i] = new Column(schema.apply(i).name());
            }
        }
        dataset = dataset.select(columns);
    }
    return dataset;
}
Also used : StructType(org.apache.spark.sql.types.StructType) DataSet(com.thinkbiganalytics.spark.DataSet) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) Column(org.apache.spark.sql.Column) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn)

Example 2 with DefaultQueryResultColumn

use of com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn in project kylo by Teradata.

the class SparkFileSchemaParserServiceTest method newColumn.

private QueryResultColumn newColumn(String name, String dataType) {
    QueryResultColumn column = new DefaultQueryResultColumn();
    column.setField(name);
    column.setDisplayName(name);
    column.setTableName("table");
    column.setDataType(dataType);
    column.setDatabaseName("database");
    return column;
}
Also used : DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn)

Example 3 with DefaultQueryResultColumn

use of com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn in project kylo by Teradata.

the class SqlTransformStage method extractSchema.

/**
 * Builds the Spark SQL schema from the specified result set.
 */
@Nonnull
private StructType extractSchema(@Nonnull final ResultSetMetaData rsmd, @Nonnull final TransformResult result) throws SQLException {
    final int columnCount = rsmd.getColumnCount();
    final List<QueryResultColumn> columns = new ArrayList<>(columnCount);
    final Map<String, Integer> displayNameMap = new HashMap<>();
    final StructField[] fields = new StructField[columnCount];
    for (int i = 0; i < columnCount; ++i) {
        final String columnLabel = rsmd.getColumnLabel(i + 1);
        final String columnName = rsmd.getColumnName(i + 1);
        final int columnType = rsmd.getColumnType(i + 1);
        final String columnTypeName = rsmd.getColumnTypeName(i + 1);
        final int precision = rsmd.getPrecision(i + 1);
        final int scale = rsmd.getScale(i + 1);
        final boolean isNullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls;
        final boolean isSigned = rsmd.isSigned(i + 1);
        final DefaultQueryResultColumn column = new DefaultQueryResultColumn();
        column.setField(columnName);
        column.setHiveColumnLabel(columnLabel);
        final String displayName = StringUtils.contains(columnLabel, ".") ? StringUtils.substringAfterLast(columnLabel, ".") : columnLabel;
        Integer count = 0;
        if (displayNameMap.containsKey(displayName)) {
            count = displayNameMap.get(displayName);
            count++;
        }
        displayNameMap.put(displayName, count);
        column.setDisplayName(displayName + "" + (count > 0 ? count : ""));
        column.setTableName(StringUtils.substringAfterLast(columnName, "."));
        column.setDataType(ParserHelper.sqlTypeToHiveType(columnType));
        column.setNativeDataType(columnTypeName);
        if (scale != 0) {
            column.setPrecisionScale(precision + "," + scale);
        } else if (precision != 0) {
            column.setPrecisionScale(Integer.toString(precision));
        }
        columns.add(column);
        final MetadataBuilder metadata = new MetadataBuilder();
        final Option<DataType> oct = dialect.getCatalystType(columnType, columnTypeName, precision, metadata);
        DataType catalystType;
        if (oct.isDefined()) {
            catalystType = oct.get();
        } else {
            catalystType = getCatalystType(columnType, precision, scale, isSigned);
        }
        fields[i] = new StructField(columnLabel, catalystType, isNullable, metadata.build());
    }
    result.setColumns(columns);
    return new StructType(fields);
}
Also used : MetadataBuilder(org.apache.spark.sql.types.MetadataBuilder) StructType(org.apache.spark.sql.types.StructType) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) StructField(org.apache.spark.sql.types.StructField) DataType(org.apache.spark.sql.types.DataType) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn) Nonnull(javax.annotation.Nonnull)

Example 4 with DefaultQueryResultColumn

use of com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn in project kylo by Teradata.

the class QueryRunner method initQueryResult.

/**
 * Initializes the query result with the specified metadata.
 *
 * @param queryResult the query result to initialize
 * @param rsMetaData  the result set metadata for the query
 * @throws SQLException if the metadata is not available
 */
private void initQueryResult(@Nonnull final DefaultQueryResult queryResult, @Nonnull final ResultSetMetaData rsMetaData) throws SQLException {
    final List<QueryResultColumn> columns = new ArrayList<>();
    final Map<String, Integer> displayNameMap = new HashMap<>();
    for (int i = 1; i <= rsMetaData.getColumnCount(); i++) {
        final DefaultQueryResultColumn column = new DefaultQueryResultColumn();
        column.setField(rsMetaData.getColumnName(i));
        String displayName = rsMetaData.getColumnLabel(i);
        column.setHiveColumnLabel(displayName);
        // remove the table name if it exists
        displayName = StringUtils.contains(displayName, ".") ? StringUtils.substringAfterLast(displayName, ".") : displayName;
        Integer count = 0;
        if (displayNameMap.containsKey(displayName)) {
            count = displayNameMap.get(displayName);
            count++;
        }
        displayNameMap.put(displayName, count);
        column.setDisplayName(displayName + "" + (count > 0 ? count : ""));
        column.setTableName(StringUtils.substringAfterLast(rsMetaData.getColumnName(i), "."));
        column.setDataType(ParserHelper.sqlTypeToHiveType(rsMetaData.getColumnType(i)));
        column.setNativeDataType(rsMetaData.getColumnTypeName(i));
        int precision = rsMetaData.getPrecision(i);
        int scale = rsMetaData.getScale(i);
        if (scale != 0) {
            column.setPrecisionScale(precision + "," + scale);
        } else if (precision != 0) {
            column.setPrecisionScale(Integer.toString(precision));
        }
        columns.add(column);
    }
    queryResult.setColumns(columns);
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) DefaultQueryResultColumn(com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn) QueryResultColumn(com.thinkbiganalytics.discovery.schema.QueryResultColumn)

Aggregations

DefaultQueryResultColumn (com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn)4 QueryResultColumn (com.thinkbiganalytics.discovery.schema.QueryResultColumn)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 StructType (org.apache.spark.sql.types.StructType)2 DataSet (com.thinkbiganalytics.spark.DataSet)1 LinkedHashMap (java.util.LinkedHashMap)1 Nonnull (javax.annotation.Nonnull)1 Column (org.apache.spark.sql.Column)1 DataType (org.apache.spark.sql.types.DataType)1 MetadataBuilder (org.apache.spark.sql.types.MetadataBuilder)1 StructField (org.apache.spark.sql.types.StructField)1