Search in sources :

Example 16 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class AbstractDBConnector method getTableSchema.

protected Schema getTableSchema(Connection connection, String database, String schema, String table) throws SQLException {
    ResultSet columns = connection.getMetaData().getColumns(database, schema, table, null);
    List<Schema.Field> fields = new ArrayList<>();
    while (columns.next()) {
        int sqlType = columns.getInt(RESULTSET_COLUMN_DATA_TYPE);
        String typeName = columns.getString(RESULTSET_COLUMN_TYPE_NAME);
        int scale = columns.getInt(RESULTSET_COLUMN_DECIMAL_DIGITS);
        int precision = columns.getInt(RESULTSET_COLUMN_COLUMN_SIZE);
        String columnName = columns.getString(RESULTSET_COLUMN_COLUMN_NAME);
        boolean isSigned = typeName.toLowerCase().indexOf("unsigned") < 0;
        Schema columnSchema = getSchema(sqlType, typeName, scale, precision, columnName, isSigned, true);
        String isNullable = columns.getString(RESULTSET_COLUMN_IS_NULLABLE);
        if ("YES".equals(isNullable)) {
            columnSchema = Schema.nullableOf(columnSchema);
        }
        fields.add(Schema.Field.of(columnName, columnSchema));
    }
    Schema outputSchema = Schema.recordOf("output", fields);
    return outputSchema;
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) ResultSet(java.sql.ResultSet) ArrayList(java.util.ArrayList)

Example 17 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class AbstractDBConnector method generateSpec.

@Override
public ConnectorSpec generateSpec(ConnectorContext connectorContext, ConnectorSpecRequest request) throws IOException {
    DBConnectorPath dbConnectorPath = getDBConnectorPath(request.getPath());
    try (Connection connection = getConnection(dbConnectorPath)) {
        ConnectorSpec.Builder specBuilder = ConnectorSpec.builder();
        setConnectorSpec(request, dbConnectorPath, specBuilder);
        String table = dbConnectorPath.getTable();
        if (table == null) {
            return specBuilder.build();
        }
        String database = dbConnectorPath.getDatabase();
        if (database == null) {
            database = connection.getCatalog();
        } else {
            validateDatabase(database, connection);
        }
        String schema = dbConnectorPath.getSchema();
        validateSchema(database, schema, connection);
        Schema outputSchema = getTableSchema(connection, database, schema, table);
        return specBuilder.setSchema(outputSchema).build();
    } catch (SQLException e) {
        throw new IOException(String.format("Failed to generate spec for path %s. Error: %s.", request.getPath(), ExceptionUtils.getRootCauseMessage(e)), e);
    }
}
Also used : SQLException(java.sql.SQLException) Schema(io.cdap.cdap.api.data.schema.Schema) Connection(java.sql.Connection) ConnectorSpec(io.cdap.cdap.etl.api.connector.ConnectorSpec) IOException(java.io.IOException)

Example 18 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class MongoDBBatchSource method configurePipeline.

@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
    super.configurePipeline(pipelineConfigurer);
    Schema schema = config.getSchema();
    BSONConverter.validateSchema(schema);
    pipelineConfigurer.getStageConfigurer().setOutputSchema(schema);
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema)

Example 19 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class TextInputFormatProvider method validate.

@Override
protected void validate() {
    if (conf.containsMacro("schema")) {
        return;
    }
    String pathField = conf.getPathField();
    Schema schema = conf.getSchema();
    // text must contain 'body' as type 'string'.
    // it can optionally contain a 'offset' field of type 'long'
    // it can optionally contain a path field of type 'string'
    Schema.Field offsetField = schema.getField("offset");
    if (offsetField != null) {
        Schema offsetSchema = offsetField.getSchema();
        Schema.Type offsetType = offsetSchema.isNullable() ? offsetSchema.getNonNullable().getType() : offsetSchema.getType();
        if (offsetType != Schema.Type.LONG) {
            throw new IllegalArgumentException(String.format("The 'offset' field must be of type 'long', but found '%s'", offsetType.name().toLowerCase()));
        }
    }
    Schema.Field bodyField = schema.getField("body");
    if (bodyField == null) {
        throw new IllegalArgumentException("The schema for the 'text' format must have a field named 'body'");
    }
    Schema bodySchema = bodyField.getSchema();
    Schema.Type bodyType = bodySchema.isNullable() ? bodySchema.getNonNullable().getType() : bodySchema.getType();
    if (bodyType != Schema.Type.STRING) {
        throw new IllegalArgumentException(String.format("The 'body' field must be of type 'string', but found '%s'", bodyType.name().toLowerCase()));
    }
    // fields should be body (required), offset (optional), [pathfield] (optional)
    boolean expectOffset = schema.getField("offset") != null;
    boolean expectPath = pathField != null;
    int numExpectedFields = 1;
    if (expectOffset) {
        numExpectedFields++;
    }
    if (expectPath) {
        numExpectedFields++;
    }
    int maxExpectedFields = pathField == null ? 2 : 3;
    int numFields = schema.getFields().size();
    if (numFields > numExpectedFields) {
        String expectedFields;
        if (expectOffset && expectPath) {
            expectedFields = String.format("'offset', 'body', and '%s' fields", pathField);
        } else if (expectOffset) {
            expectedFields = "'offset' and 'body' fields";
        } else if (expectPath) {
            expectedFields = String.format("'body' and '%s' fields", pathField);
        } else {
            expectedFields = "'body' field";
        }
        int numExtraFields = numFields - maxExpectedFields;
        throw new IllegalArgumentException(String.format("The schema for the 'text' format must only contain the %s, but found %d other field%s", expectedFields, numExtraFields, numExtraFields > 1 ? "s" : ""));
    }
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema)

Example 20 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class TextInputFormatProvider method validate.

@Override
public void validate(FormatContext context) {
    if (conf.containsMacro(TextConfig.NAME_SCHEMA)) {
        return;
    }
    FailureCollector collector = context.getFailureCollector();
    Schema schema;
    try {
        schema = conf.getSchema();
    } catch (Exception e) {
        collector.addFailure(e.getMessage(), null).withConfigProperty(TextConfig.NAME_SCHEMA).withStacktrace(e.getStackTrace());
        throw collector.getOrThrowException();
    }
    String pathField = conf.getPathField();
    // text must contain 'body' as type 'string'.
    // it can optionally contain a 'offset' field of type 'long'
    // it can optionally contain a path field of type 'string'
    Schema.Field offsetField = schema.getField(TextConfig.NAME_OFFSET);
    if (offsetField != null) {
        Schema offsetSchema = offsetField.getSchema();
        offsetSchema = offsetSchema.isNullable() ? offsetSchema.getNonNullable() : offsetSchema;
        Schema.Type offsetType = offsetSchema.getType();
        if (offsetType != Schema.Type.LONG) {
            collector.addFailure(String.format("The 'offset' field is of unexpected type '%s'.", offsetSchema.getDisplayName()), "Change type to 'long'.").withOutputSchemaField(TextConfig.NAME_OFFSET);
        }
    }
    Schema.Field bodyField = schema.getField(TextConfig.NAME_BODY);
    if (bodyField == null) {
        collector.addFailure("The schema for the 'text' format must have a field named 'body'.", null).withConfigProperty(TextConfig.NAME_SCHEMA);
    } else {
        Schema bodySchema = bodyField.getSchema();
        bodySchema = bodySchema.isNullable() ? bodySchema.getNonNullable() : bodySchema;
        Schema.Type bodyType = bodySchema.getType();
        if (bodyType != Schema.Type.STRING) {
            collector.addFailure(String.format("The 'body' field is of unexpected type '%s'.'", bodySchema.getDisplayName()), "Change type to 'string'.").withOutputSchemaField(TextConfig.NAME_BODY);
        }
    }
    // fields should be body (required), offset (optional), [pathfield] (optional)
    boolean expectOffset = schema.getField(TextConfig.NAME_OFFSET) != null;
    boolean expectPath = pathField != null;
    int numExpectedFields = 1;
    if (expectOffset) {
        numExpectedFields++;
    }
    if (expectPath) {
        numExpectedFields++;
    }
    int numFields = schema.getFields().size();
    if (numFields > numExpectedFields) {
        for (Schema.Field field : schema.getFields()) {
            String expectedFields;
            if (expectOffset && expectPath) {
                expectedFields = String.format("'offset', 'body', and '%s' fields", pathField);
            } else if (expectOffset) {
                expectedFields = "'offset' and 'body' fields";
            } else if (expectPath) {
                expectedFields = String.format("'body' and '%s' fields", pathField);
            } else {
                expectedFields = "'body' field";
            }
            if (field.getName().equals(TextConfig.NAME_BODY) || (expectPath && field.getName().equals(pathField)) || field.getName().equals(TextConfig.NAME_OFFSET)) {
                continue;
            }
            collector.addFailure(String.format("The schema for the 'text' format must only contain the '%s'.", expectedFields), String.format("Remove additional field '%s'.", field.getName())).withOutputSchemaField(field.getName());
        }
    }
}
Also used : Schema(io.cdap.cdap.api.data.schema.Schema) IOException(java.io.IOException) FailureCollector(io.cdap.cdap.etl.api.FailureCollector)

Aggregations

Schema (io.cdap.cdap.api.data.schema.Schema)1135 Test (org.junit.Test)664 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)432 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)177 Table (io.cdap.cdap.api.dataset.table.Table)169 ApplicationManager (io.cdap.cdap.test.ApplicationManager)148 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)141 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)133 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)130 ArrayList (java.util.ArrayList)114 HashSet (java.util.HashSet)113 HashMap (java.util.HashMap)101 WorkflowManager (io.cdap.cdap.test.WorkflowManager)96 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)81 IOException (java.io.IOException)69 FailureCollector (io.cdap.cdap.etl.api.FailureCollector)67 MockPipelineConfigurer (io.cdap.cdap.etl.mock.common.MockPipelineConfigurer)56 Map (java.util.Map)56 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)47 ReflectionSchemaGenerator (io.cdap.cdap.internal.io.ReflectionSchemaGenerator)46