Search in sources :

Example 6 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeArrayFields_bydjl.

/**
 * Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
 *
 * Normalize simple field: `b.y.d.j.l`
 *
 * Throw an exception: the element l is inside a loop.
 *
 * @throws Exception
 */
@Test(expected = TalendRuntimeException.class)
public void testNormalizeArrayFields_bydjl() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    // Normalize `b.y.d.j` array field
    properties.columnToNormalize.setValue("b.y.d.j.l");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    fnTester.processBundle(inputParentRecord);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Example 7 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeComplexFields_by.

/**
 * Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
 *
 * Normalize complex field: `b.y`
 *
 * Expected: no change
 *
 * @throws Exception
 */
@Test
public void testNormalizeComplexFields_by() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    // Normalize `b.y` complex field
    properties.columnToNormalize.setValue("b.y");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
    Assert.assertEquals(1, outputs.size());
    GenericRecord outputRecord = (GenericRecord) outputs.get(0);
    Assert.assertEquals(inputParentRecord.toString(), outputRecord.toString());
    Assert.assertEquals(inputParentRecord.getSchema().toString(), outputRecord.getSchema().toString());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Example 8 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeSimpleFields_m.

/**
 * Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
 *
 * Normalize simple field: `m`
 *
 * The schema of m must change from a list to a simple object. Expected normalized results of the field `m`:
 *
 * [{"a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c":
 * {"f": "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": "m1"},
 *
 * {"a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c":
 * {"f": "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": "m2"},
 *
 * {"a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c":
 * {"f": "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": "m3"}]
 *
 * @throws Exception
 */
@Test
public void testNormalizeSimpleFields_m() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    // Normalize `m` simple field
    properties.isList.setValue(false);
    properties.columnToNormalize.setValue("m");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
    Assert.assertEquals(3, outputs.size());
    Schema expectedParentSchema = SchemaBuilder.record(// 
    "inputParentRow").fields().name("a").type().optional().stringType().name("b").type(inputSchemaXY).noDefault().name("c").type(inputSchemaFG).noDefault().name("m").type().stringType().noDefault().endRecord();
    GenericRecord expectedParentRecordM1 = // 
    new GenericRecordBuilder(expectedParentSchema).set("a", // 
    "aaa").set("b", // 
    inputRecordXY).set("c", // 
    inputRecordFG).set("m", // 
    "m1").build();
    GenericRecord expectedParentRecordM2 = // 
    new GenericRecordBuilder(expectedParentSchema).set("a", // 
    "aaa").set("b", // 
    inputRecordXY).set("c", // 
    inputRecordFG).set("m", // 
    "m2").build();
    GenericRecord expectedParentRecordM3 = // 
    new GenericRecordBuilder(expectedParentSchema).set("a", // 
    "aaa").set("b", // 
    inputRecordXY).set("c", // 
    inputRecordFG).set("m", // 
    "m3").build();
    GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
    GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
    GenericRecord outputRecord3 = (GenericRecord) outputs.get(2);
    Assert.assertEquals(expectedParentRecordM1.toString(), outputRecord1.toString());
    Assert.assertEquals(expectedParentRecordM1.getSchema().toString(), outputRecord1.getSchema().toString());
    Assert.assertEquals(expectedParentRecordM2.toString(), outputRecord2.toString());
    Assert.assertEquals(expectedParentRecordM2.getSchema().toString(), outputRecord1.getSchema().toString());
    Assert.assertEquals(expectedParentRecordM3.toString(), outputRecord3.toString());
    Assert.assertEquals(expectedParentRecordM3.getSchema().toString(), outputRecord1.getSchema().toString());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Example 9 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeSimpleFields_otherSeparator.

/**
 * Input parent record: inputParentRecord_otherSeparator
 *
 * Normalize simple field `b.x` with separator `#`
 *
 * Expected normalized results of the field `b.x`:
 *
 * [{"a": "aaa", "b": {"x": "x1", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
 * "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]},
 *
 * {"a": "aaa", "b": {"x": "x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
 * "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]}]
 *
 * @throws Exception
 */
@Test
public void testNormalizeSimpleFields_otherSeparator() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    properties.isList.setValue(false);
    properties.fieldSeparator.setValue(NormalizeConstant.Delimiter.OTHER);
    properties.otherSeparator.setValue("#");
    properties.trim.setValue(true);
    properties.discardTrailingEmptyStr.setValue(true);
    // Normalize `b.x` simple field
    properties.columnToNormalize.setValue("b.x");
    GenericRecord inputRecordXY_otherSeparator = // 
    new GenericRecordBuilder(inputSchemaXY).set("x", // 
    "x1#x2").set("y", // listDE
    inputRecordDE).build();
    GenericRecord inputParentRecord_otherSeparator = // 
    new GenericRecordBuilder(inputParentSchema).set("a", // 
    "aaa").set("b", // 
    inputRecordXY_otherSeparator).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord_otherSeparator);
    Assert.assertEquals(2, outputs.size());
    GenericRecord expectedRecordX1Y = // 
    new GenericRecordBuilder(inputSchemaXY).set("x", // 
    "x1").set("y", // 
    inputRecordDE).build();
    GenericRecord expectedRecordX2Y = // 
    new GenericRecordBuilder(inputSchemaXY).set("x", // 
    "x2").set("y", // 
    inputRecordDE).build();
    GenericRecord expectedParentRecordX1 = // 
    new GenericRecordBuilder(inputParentSchema).set("a", // 
    "aaa").set("b", // 
    expectedRecordX1Y).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    GenericRecord expectedParentRecordX2 = // 
    new GenericRecordBuilder(inputParentSchema).set("a", // 
    "aaa").set("b", // 
    expectedRecordX2Y).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
    GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
    Assert.assertEquals(expectedParentRecordX1.toString(), outputRecord1.toString());
    Assert.assertEquals(expectedParentRecordX1.getSchema().toString(), outputRecord1.getSchema().toString());
    Assert.assertEquals(expectedParentRecordX2.toString(), outputRecord2.toString());
    Assert.assertEquals(expectedParentRecordX2.getSchema().toString(), outputRecord2.getSchema().toString());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Example 10 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeArrayFields_bydj.

/**
 * Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
 *
 * Normalize array field: `b.y.d.j`
 *
 * The schema of j must change from a list to a simple object Expected normalized results of the field `b.y.d.j`:
 *
 * [{ "a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l":"l1"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f": "f", "g":
 * [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]},
 *
 * {"a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l":"l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f": "f", "g":
 * [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"] }]
 *
 * @throws Exception
 */
@Test
public void testNormalizeArrayFields_bydj() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    // Normalize `b.y.d.j` array field
    properties.isList.setValue(true);
    properties.columnToNormalize.setValue("b.y.d.j");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
    Assert.assertEquals(2, outputs.size());
    Schema expectedSchemaL = SchemaBuilder.record(// 
    "inputRowL").fields().name("l").type().optional().stringType().endRecord();
    Schema expectedSchemaJK = SchemaBuilder.record(// 
    "inputRowJK").fields().name("j").type(expectedSchemaL).noDefault().name("k").type().optional().stringType().endRecord();
    Schema expectedSchemaDE = SchemaBuilder.record(// 
    "inputRowDE").fields().name("d").type(expectedSchemaJK).noDefault().name("e").type().optional().stringType().endRecord();
    Schema expectedSchemaXY = SchemaBuilder.record(// 
    "inputRowXY").fields().name("x").type().optional().stringType().name("y").type(expectedSchemaDE).noDefault().endRecord();
    Schema expectedParentSchema = SchemaBuilder.record(// 
    "inputParentRow").fields().name("a").type().optional().stringType().name("b").type(expectedSchemaXY).noDefault().name("c").type(inputSchemaFG).noDefault().name("m").type(inputSchemaListM).noDefault().endRecord();
    GenericRecord expectedRecordJ1K = // 
    new GenericRecordBuilder(expectedSchemaJK).set("j", // 
    inputRecordL1).set("k", // 
    "k1;k2").build();
    GenericRecord expectedRecordDE1 = // 
    new GenericRecordBuilder(expectedSchemaDE).set("d", // 
    expectedRecordJ1K).set("e", // 
    "e").build();
    GenericRecord expectedRecordXY1 = // 
    new GenericRecordBuilder(expectedSchemaXY).set("x", // 
    "x1;x2").set("y", // 
    expectedRecordDE1).build();
    GenericRecord expectedParentRecordL1 = // 
    new GenericRecordBuilder(expectedParentSchema).set("a", // 
    "aaa").set("b", // 
    expectedRecordXY1).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    GenericRecord expectedRecordJ2K = // 
    new GenericRecordBuilder(expectedSchemaJK).set("j", // 
    inputRecordL2).set("k", // 
    "k1;k2").build();
    GenericRecord expectedRecordDE2 = // 
    new GenericRecordBuilder(expectedSchemaDE).set("d", // 
    expectedRecordJ2K).set("e", // 
    "e").build();
    GenericRecord expectedRecordXY2 = // 
    new GenericRecordBuilder(expectedSchemaXY).set("x", // 
    "x1;x2").set("y", // 
    expectedRecordDE2).build();
    GenericRecord expectedParentRecordL2 = // 
    new GenericRecordBuilder(expectedParentSchema).set("a", // 
    "aaa").set("b", // 
    expectedRecordXY2).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
    GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
    Assert.assertEquals(expectedParentRecordL1.toString(), outputRecord1.toString());
    Assert.assertEquals(expectedParentRecordL1.getSchema().toString(), outputRecord1.getSchema().toString());
    Assert.assertEquals(expectedParentRecordL2.toString(), outputRecord2.toString());
    Assert.assertEquals(expectedParentRecordL2.getSchema().toString(), outputRecord2.getSchema().toString());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Aggregations

IndexedRecord (org.apache.avro.generic.IndexedRecord)17 Test (org.junit.Test)17 NormalizeProperties (org.talend.components.processing.definition.normalize.NormalizeProperties)17 GenericRecord (org.apache.avro.generic.GenericRecord)13 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)8 Schema (org.apache.avro.Schema)3 AbstractList (java.util.AbstractList)1