Search in sources :

Example 6 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class HiveBatchSource method initialize.

@Override
public void initialize(BatchRuntimeContext context) throws Exception {
    super.initialize(context);
    HCatSchema hCatSchema = GSON.fromJson(context.getArguments().get(config.getDBTable()), HCatSchema.class);
    Schema schema;
    if (config.schema == null) {
        // if the user did not provide a schema then convert the hive table's schema to cdap schema
        schema = HiveSchemaConverter.toSchema(hCatSchema);
    } else {
        schema = config.getSchema();
    }
    hCatRecordTransformer = new HCatRecordTransformer(hCatSchema, schema);
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Schema(io.cdap.cdap.api.data.schema.Schema)

Example 7 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class ValueMapperTest method testMappingValidation.

@Test
public void testMappingValidation() throws Exception {
    Schema inputSchema = Schema.recordOf("sourceRecord", Schema.Field.of(ID, Schema.of(Schema.Type.STRING)), Schema.Field.of(NAME, Schema.of(Schema.Type.STRING)), Schema.Field.of(SALARY, Schema.of(Schema.Type.STRING)), Schema.Field.of(DESIGNATIONID, Schema.of(Schema.Type.STRING)));
    ValueMapper.Config config = new ValueMapper.Config("designationid:designation_lookup_table", "designationid:DEFAULTID");
    MockPipelineConfigurer configurer = new MockPipelineConfigurer(inputSchema);
    try {
        new ValueMapper(config).configurePipeline(configurer);
        Assert.fail();
    } catch (ValidationException e) {
        Assert.assertEquals(1, e.getFailures().size());
        Assert.assertEquals(1, e.getFailures().get(0).getCauses().size());
        Cause expectedCause = new Cause();
        expectedCause.addAttribute(STAGE, MOCK_STAGE);
        expectedCause.addAttribute(CauseAttributes.STAGE_CONFIG, ValueMapper.Config.MAPPING);
        expectedCause.addAttribute(CauseAttributes.CONFIG_ELEMENT, "designationid:designation_lookup_table");
        Assert.assertEquals(expectedCause, e.getFailures().get(0).getCauses().get(0));
    }
}
Also used : ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) MockPipelineConfigurer(io.cdap.cdap.etl.mock.common.MockPipelineConfigurer) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) Schema(io.cdap.cdap.api.data.schema.Schema) Cause(io.cdap.cdap.etl.api.validation.ValidationFailure.Cause) Test(org.junit.Test)

Example 8 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class XMLParserTest method testXpathArray.

@Test
public void testXpathArray() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("category", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("title", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    XMLParser.Config config = new XMLParser.Config("body", "UTF-8", "category://book/@category,title://book/title", "category:string,title:string", "Exit on error");
    Transform<StructuredRecord, StructuredRecord> transform = new XMLParser(config);
    transform.initialize(new MockTransformContext());
    MockEmitter<StructuredRecord> emitter = new MockEmitter<>();
    StructuredRecord inputRecord = StructuredRecord.builder(INPUT).set("offset", 1).set("body", "<bookstore><book category=\"cooking\"><title lang=\"en\">Everyday Italian</title>" + "<author>Giada De Laurentiis</author><year>2005</year><price>30.00</price></book>" + "<book category=\"children\"><title lang=\"en\">Harry Potter</title><author>J K. Rowling</author>" + "<year>2005</year><price>29.99</price></book></bookstore>").build();
    transform.transform(inputRecord, emitter);
    List<StructuredRecord> expected = ImmutableList.of(StructuredRecord.builder(schema).set("category", "cooking").set("title", "Everyday Italian").build());
    Assert.assertEquals(expected, emitter.getEmitted());
}
Also used : MockTransformContext(io.cdap.cdap.etl.mock.transform.MockTransformContext) MockEmitter(io.cdap.cdap.etl.mock.common.MockEmitter) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 9 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class XMLParserTest method testXpathWithMultipleElements.

@Test
public void testXpathWithMultipleElements() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("category", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("title", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("price", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))), Schema.Field.of("year", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("subcategory", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    XMLParser.Config config = new XMLParser.Config("body", "UTF-16 (Unicode with byte-order mark)", "category://book/@category,title://book/title,year:/bookstore/book[price>35.00]/year," + "price:/bookstore/book[price>35.00]/price,subcategory://book/subcategory", "category:string,title:string,price:double,year:int,subcategory:string", "Write to error dataset");
    Transform<StructuredRecord, StructuredRecord> transform = new XMLParser(config);
    transform.initialize(new MockTransformContext());
    MockEmitter<StructuredRecord> emitter = new MockEmitter<>();
    StructuredRecord inputRecord = StructuredRecord.builder(INPUT).set("offset", 1).set("body", "<bookstore><book category=\"cooking\"><subcategory><type>Continental</type></subcategory>" + "<title lang=\"en\">Everyday Italian</title><author>Giada De Laurentiis</author><year>2005</year>" + "<price>30.00</price></book></bookstore>").build();
    transform.transform(inputRecord, emitter);
    List<StructuredRecord> expected = ImmutableList.of(StructuredRecord.builder(schema).set("category", "cooking").set("title", "Everyday Italian").set("subcategory", "<subcategory><type>Continental</type></subcategory>").build());
    Assert.assertEquals(expected, emitter.getEmitted());
    emitter.clear();
    inputRecord = StructuredRecord.builder(INPUT).set("offset", 2).set("body", "<bookstore><book category=\"children\"><subcategory><type>Series</type></subcategory>" + "<title lang=\"en\">Harry Potter</title><author>J K. Rowling</author><year>2005</year><price>49.99</price>" + "</book></bookstore>").build();
    transform.transform(inputRecord, emitter);
    expected = ImmutableList.of(StructuredRecord.builder(schema).set("category", "children").set("title", "Harry Potter").set("price", 49.99d).set("year", 2005).set("subcategory", "<subcategory><type>Series</type></subcategory>").build());
    Assert.assertEquals(expected, emitter.getEmitted());
    emitter.clear();
    inputRecord = StructuredRecord.builder(INPUT).set("offset", 3).set("body", "<bookstore><book category=\"web\"><subcategory><type>Basics</type></subcategory>" + "<title lang=\"en\">Learning XML</title><author>Erik T. Ray</author><year>2003</year><price>39.95</price>" + "</book></bookstore>").build();
    transform.transform(inputRecord, emitter);
    expected = ImmutableList.of(StructuredRecord.builder(schema).set("category", "web").set("title", "Learning XML").set("price", 39.95d).set("year", 2003).set("subcategory", "<subcategory><type>Basics</type></subcategory>").build());
    Assert.assertEquals(expected, emitter.getEmitted());
    emitter.clear();
}
Also used : MockTransformContext(io.cdap.cdap.etl.mock.transform.MockTransformContext) MockEmitter(io.cdap.cdap.etl.mock.common.MockEmitter) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) Test(org.junit.Test)

Example 10 with Schema

use of io.cdap.cdap.api.data.schema.Schema in project hydrator-plugins by cdapio.

the class XMLParserTest method testInputFieldNotInSchema.

@Test
public void testInputFieldNotInSchema() throws Exception {
    Schema schema = Schema.recordOf("record", Schema.Field.of("title", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("author", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("year", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    XMLParser.Config config = new XMLParser.Config("x", "UTF-8", "title:/book/title,author:/book/author,year:/book/year", "title:string,author:string,year:string", "Write to error dataset");
    MockPipelineConfigurer configurer = new MockPipelineConfigurer(INPUT);
    new XMLParser(config).configurePipeline(configurer);
    FailureCollector collector = configurer.getStageConfigurer().getFailureCollector();
    Assert.assertEquals(1, collector.getValidationFailures().size());
    Assert.assertEquals(1, collector.getValidationFailures().get(0).getCauses().size());
    Cause expectedCause = new Cause();
    expectedCause.addAttribute(CauseAttributes.STAGE_CONFIG, XMLParser.Config.INPUT);
    Assert.assertEquals(expectedCause, collector.getValidationFailures().get(0).getCauses().get(0));
}
Also used : MockPipelineConfigurer(io.cdap.cdap.etl.mock.common.MockPipelineConfigurer) Schema(io.cdap.cdap.api.data.schema.Schema) Cause(io.cdap.cdap.etl.api.validation.ValidationFailure.Cause) FailureCollector(io.cdap.cdap.etl.api.FailureCollector) Test(org.junit.Test)

Aggregations

Schema (io.cdap.cdap.api.data.schema.Schema)1135 Test (org.junit.Test)664 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)432 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)177 Table (io.cdap.cdap.api.dataset.table.Table)169 ApplicationManager (io.cdap.cdap.test.ApplicationManager)148 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)141 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)133 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)130 ArrayList (java.util.ArrayList)114 HashSet (java.util.HashSet)113 HashMap (java.util.HashMap)101 WorkflowManager (io.cdap.cdap.test.WorkflowManager)96 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)81 IOException (java.io.IOException)69 FailureCollector (io.cdap.cdap.etl.api.FailureCollector)67 MockPipelineConfigurer (io.cdap.cdap.etl.mock.common.MockPipelineConfigurer)56 Map (java.util.Map)56 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)47 ReflectionSchemaGenerator (io.cdap.cdap.internal.io.ReflectionSchemaGenerator)46