Examples with Schema - co.cask.cdap.api.data.schema.Schema

Example 56 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class MockJoiner method configurePipeline.

@Override
public void configurePipeline(MultiInputPipelineConfigurer pipelineConfigurer) {
    MultiInputStageConfigurer stageConfigurer = pipelineConfigurer.getMultiInputStageConfigurer();
    Map<String, Schema> inputSchemas = stageConfigurer.getInputSchemas();
    stageConfigurer.setOutputSchema(getOutputSchema(inputSchemas));
    config.validateConfig();
}

Also used : Schema(co.cask.cdap.api.data.schema.Schema) MultiInputStageConfigurer(co.cask.cdap.etl.api.MultiInputStageConfigurer)

Example 57 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class MockRuntimeDatasetSink method readOutput.

/**
   * Used to read the records written by this sink.
   *
   * @param tableManager dataset manager used to get the sink dataset to read from
   */
public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception {
    Table table = tableManager.get();
    try (Scanner scanner = table.scan(null, null)) {
        List<StructuredRecord> records = new ArrayList<>();
        Row row;
        while ((row = scanner.next()) != null) {
            Schema schema = Schema.parseJson(row.getString(SCHEMA_COL));
            String recordStr = row.getString(RECORD_COL);
            records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema));
        }
        return records;
    }
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Table(co.cask.cdap.api.dataset.table.Table) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) Row(co.cask.cdap.api.dataset.table.Row) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 58 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class MockSink method readOutput.

/**
   * Used to read the records written by this sink.
   *
   * @param tableManager dataset manager used to get the sink dataset to read from
   */
public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception {
    tableManager.flush();
    Table table = tableManager.get();
    try (Scanner scanner = table.scan(null, null)) {
        List<StructuredRecord> records = new ArrayList<>();
        Row row;
        while ((row = scanner.next()) != null) {
            Schema schema = Schema.parseJson(row.getString(SCHEMA_COL));
            String recordStr = row.getString(RECORD_COL);
            records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema));
        }
        return records;
    }
}

Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) Row(co.cask.cdap.api.dataset.table.Row) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Example 59 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class SchemaTest method testParseSQLWithWhitespace.

@Test
public void testParseSQLWithWhitespace() throws IOException {
    String schemaStr = "map_field map< string , int >   not null,\n" + "arr_field array< record< x:int , y:double >\t> not null";
    Schema expectedSchema = Schema.recordOf("rec", Schema.Field.of("map_field", Schema.mapOf(Schema.nullableOf(Schema.of(Schema.Type.STRING)), Schema.nullableOf(Schema.of(Schema.Type.INT)))), Schema.Field.of("arr_field", Schema.arrayOf(Schema.nullableOf(Schema.recordOf("rec1", Schema.Field.of("x", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("y", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))))))));
    Assert.assertEquals(expectedSchema, Schema.parseSQL(schemaStr));
}

Also used : Schema(co.cask.cdap.api.data.schema.Schema) Test(org.junit.Test)

Example 60 with Schema

use of co.cask.cdap.api.data.schema.Schema in project cdap by caskdata.

the class SchemaTest method testAvroEnumSchema.

@Test
public void testAvroEnumSchema() throws Exception {
    org.apache.avro.Schema schema = org.apache.avro.Schema.createEnum("UserInterests", "Describes interests of user", "org.example.schema", ImmutableList.of("CRICKET", "BASEBALL"));
    Schema parsedSchema = Schema.parseJson(schema.toString());
    Assert.assertEquals(ImmutableSet.of("CRICKET", "BASEBALL"), parsedSchema.getEnumValues());
}

Also used : Schema(co.cask.cdap.api.data.schema.Schema) Test(org.junit.Test)

Aggregations

Schema (co.cask.cdap.api.data.schema.Schema)210 Test (org.junit.Test)92 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)69 Table (co.cask.cdap.api.dataset.table.Table)38 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)35 ApplicationId (co.cask.cdap.proto.id.ApplicationId)34 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)32 ApplicationManager (co.cask.cdap.test.ApplicationManager)30 AppRequest (co.cask.cdap.proto.artifact.AppRequest)29 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)24 IOException (java.io.IOException)23 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)22 ReflectionSchemaGenerator (co.cask.cdap.internal.io.ReflectionSchemaGenerator)22 ArrayList (java.util.ArrayList)22 WorkflowManager (co.cask.cdap.test.WorkflowManager)20 Map (java.util.Map)18 Set (java.util.Set)14 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)12 HashMap (java.util.HashMap)12 HashSet (java.util.HashSet)11