Search in sources :

Example 1 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeSimpleFields_bx.

/**
 * Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
 *
 * Normalize simple field: `b.x`
 *
 * Expected normalized results of the field `b.x`:
 *
 * [{"a": "aaa", "b": {"x": "x1", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
 * "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]},
 *
 * {"a": "aaa", "b": {"x": "x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
 * "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]}]
 *
 * @throws Exception
 */
@Test
public void testNormalizeSimpleFields_bx() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    properties.isList.setValue(false);
    properties.trim.setValue(true);
    properties.discardTrailingEmptyStr.setValue(true);
    // Normalize `b.x` simple field
    properties.columnToNormalize.setValue("b.x");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
    Assert.assertEquals(2, outputs.size());
    GenericRecord expectedRecordX1Y = // 
    new GenericRecordBuilder(inputSchemaXY).set("x", // 
    "x1").set("y", // 
    inputRecordDE).build();
    GenericRecord expectedRecordX2Y = // 
    new GenericRecordBuilder(inputSchemaXY).set("x", // 
    "x2").set("y", // 
    inputRecordDE).build();
    GenericRecord expectedParentRecordX1 = // 
    new GenericRecordBuilder(inputParentSchema).set("a", // 
    "aaa").set("b", // 
    expectedRecordX1Y).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    GenericRecord expectedParentRecordX2 = // 
    new GenericRecordBuilder(inputParentSchema).set("a", // 
    "aaa").set("b", // 
    expectedRecordX2Y).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
    GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
    Assert.assertEquals(expectedParentRecordX1.toString(), outputRecord1.toString());
    Assert.assertEquals(expectedParentRecordX1.getSchema().toString(), outputRecord1.getSchema().toString());
    Assert.assertEquals(expectedParentRecordX2.toString(), outputRecord2.toString());
    Assert.assertEquals(expectedParentRecordX2.getSchema().toString(), outputRecord2.getSchema().toString());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Example 2 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeArrayFields_cg.

/**
 * Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
 *
 * Normalize array field: c.g`
 *
 * The schema of g must change from a list to a simple object. Expected normalized results of the field `c.g`:
 *
 * [{"a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c":
 * {"f": "f", "g": {"h": "h1", "i": "i2"}}, "m": ["m1", "m2", "m3"]},
 *
 * {"a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c":
 * {"f": "f", "g": {"h": "h2", "i": "i1"}}, "m": ["m1", "m2", "m3"]}]
 *
 * @throws Exception
 */
@Test
public void testNormalizeArrayFields_cg() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    // Normalize `c.g` array field
    properties.isList.setValue(true);
    properties.columnToNormalize.setValue("c.g");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
    Assert.assertEquals(2, outputs.size());
    Schema expectedSchemaHI = SchemaBuilder.record(// 
    "inputRowHI").fields().name("h").type().optional().stringType().name("i").type().optional().stringType().endRecord();
    Schema expectedSchemaFG = SchemaBuilder.record(// 
    "inputRowFG").fields().name("f").type().optional().stringType().name("g").type(expectedSchemaHI).noDefault().endRecord();
    Schema expectedParentSchema = SchemaBuilder.record(// 
    "inputParentRow").fields().name("a").type().optional().stringType().name("b").type(inputSchemaXY).noDefault().name("c").type(expectedSchemaFG).noDefault().name("m").type(inputSchemaListM).noDefault().endRecord();
    GenericRecord expectedRecordFG1 = // 
    new GenericRecordBuilder(expectedSchemaFG).set("f", // 
    "f").set("g", // 
    inputRecordHI1).build();
    GenericRecord expectedParentRecordG1 = // 
    new GenericRecordBuilder(expectedParentSchema).set("a", // 
    "aaa").set("b", // 
    inputRecordXY).set("c", // 
    expectedRecordFG1).set("m", // 
    listInputRecordM).build();
    GenericRecord expectedRecordFG2 = // 
    new GenericRecordBuilder(expectedSchemaFG).set("f", // 
    "f").set("g", // 
    inputRecordHI2).build();
    GenericRecord expectedParentRecordG2 = // 
    new GenericRecordBuilder(expectedParentSchema).set("a", // 
    "aaa").set("b", // 
    inputRecordXY).set("c", // 
    expectedRecordFG2).set("m", // 
    listInputRecordM).build();
    GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
    GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
    Assert.assertEquals(expectedParentRecordG1.toString(), outputRecord1.toString());
    Assert.assertEquals(expectedParentRecordG1.getSchema().toString(), outputRecord1.getSchema().toString());
    Assert.assertEquals(expectedParentRecordG2.toString(), outputRecord2.toString());
    Assert.assertEquals(expectedParentRecordG2.getSchema().toString(), outputRecord2.getSchema().toString());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Example 3 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeComplexFields_byd.

/**
 * Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
 *
 * Normalize complex field: `b.y.d`
 *
 * Expected: no change
 *
 * @throws Exception
 */
@Test
public void testNormalizeComplexFields_byd() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    // Normalize `b.y.d` complex field
    properties.columnToNormalize.setValue("b.y.d");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
    Assert.assertEquals(1, outputs.size());
    GenericRecord outputRecord = (GenericRecord) outputs.get(0);
    Assert.assertEquals(inputParentRecord.toString(), outputRecord.toString());
    Assert.assertEquals(inputParentRecord.getSchema().toString(), outputRecord.getSchema().toString());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Example 4 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testVariableDuplication.

/**
 * This test will normalize `b.x`. It will create 2 output. We are going to see if modifying the first output will
 * not have any impact on the second one.
 *
 * Normalize simple field: `b.x`
 *
 * Expected normalized results of the field `b.x`:
 *
 * [{"a": "aaa", "b": {"x": "x1", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
 * "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]},
 *
 * {"a": "aaa", "b": {"x": "x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
 * "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]}]
 *
 * After modification : [{"a": "MODIFIED_A", "b": {"x": "MODIFIED_X1", "y": {"d": {"j": [{"l": "MODIFIED_L1"}, {"l":
 * "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f": "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m":
 * ["m1", "m2", "m3", "MODIFIED_M1"]},
 *
 * {"a": "aaa", "b": {"x": "x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
 * "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]}]
 *
 * @throws Exception
 */
@SuppressWarnings("unchecked")
@Test
public void testVariableDuplication() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    properties.isList.setValue(false);
    properties.trim.setValue(true);
    properties.discardTrailingEmptyStr.setValue(true);
    // Normalize `b.x` simple field
    properties.columnToNormalize.setValue("b.x");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
    Assert.assertEquals(2, outputs.size());
    GenericRecord expectedRecordX1Y = // 
    new GenericRecordBuilder(inputSchemaXY).set("x", // 
    "x1").set("y", // 
    inputRecordDE).build();
    GenericRecord expectedRecordX2Y = // 
    new GenericRecordBuilder(inputSchemaXY).set("x", // 
    "x2").set("y", // 
    inputRecordDE).build();
    GenericRecord expectedParentRecordX1 = // 
    new GenericRecordBuilder(inputParentSchema).set("a", // 
    "aaa").set("b", // 
    expectedRecordX1Y).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    GenericRecord expectedParentRecordX2 = // 
    new GenericRecordBuilder(inputParentSchema).set("a", // 
    "aaa").set("b", // 
    expectedRecordX2Y).set("c", // 
    inputRecordFG).set("m", // 
    listInputRecordM).build();
    // test initial output
    GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
    GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
    Assert.assertEquals(expectedParentRecordX1.toString(), outputRecord1.toString());
    Assert.assertEquals(expectedParentRecordX1.getSchema().toString(), outputRecord1.getSchema().toString());
    Assert.assertEquals(expectedParentRecordX2.toString(), outputRecord2.toString());
    Assert.assertEquals(expectedParentRecordX2.getSchema().toString(), outputRecord2.getSchema().toString());
    // modify outputRecord1
    // Test a simple variable
    outputRecord1.put("a", "MODIFIED_A");
    // Test a hierarchical variable
    ((GenericRecord) outputRecord1.get("b")).put("x", "MODIFIED_X1");
    // Test a looped variable
    AbstractList<GenericRecord> j = (AbstractList<GenericRecord>) ((GenericRecord) ((GenericRecord) ((GenericRecord) outputRecord1.get("b")).get("y")).get("d")).get("j");
    j.get(0).put("l", "MODIFIED_L1");
    // Check outputRecord2
    Assert.assertNotEquals(expectedParentRecordX1.toString(), outputRecord1.toString());
    Assert.assertEquals(expectedParentRecordX2.toString(), outputRecord2.toString());
    Assert.assertEquals(expectedParentRecordX2.getSchema().toString(), outputRecord2.getSchema().toString());
}
Also used : AbstractList(java.util.AbstractList) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Example 5 with NormalizeProperties

use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.

the class NormalizeDoFnTest method testNormalizeComplexFields_b.

/**
 * Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
 *
 * Normalize complex field: `b`
 *
 * Expected: no change
 *
 * @throws Exception
 */
@Test
public void testNormalizeComplexFields_b() throws Exception {
    NormalizeProperties properties = new NormalizeProperties("test");
    properties.init();
    properties.schemaListener.afterSchema();
    // Normalize `b` complex field
    properties.columnToNormalize.setValue("b");
    NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
    DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
    List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
    Assert.assertEquals(1, outputs.size());
    GenericRecord outputRecord = (GenericRecord) outputs.get(0);
    Assert.assertEquals(inputParentRecord.toString(), outputRecord.toString());
    Assert.assertEquals(inputParentRecord.getSchema().toString(), outputRecord.getSchema().toString());
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) NormalizeProperties(org.talend.components.processing.definition.normalize.NormalizeProperties) Test(org.junit.Test)

Aggregations

IndexedRecord (org.apache.avro.generic.IndexedRecord)17 Test (org.junit.Test)17 NormalizeProperties (org.talend.components.processing.definition.normalize.NormalizeProperties)17 GenericRecord (org.apache.avro.generic.GenericRecord)13 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)8 Schema (org.apache.avro.Schema)3 AbstractList (java.util.AbstractList)1