use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.
the class NormalizeDoFnTest method testNormalizeSimpleFields_bx.
/**
* Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
*
* Normalize simple field: `b.x`
*
* Expected normalized results of the field `b.x`:
*
* [{"a": "aaa", "b": {"x": "x1", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
* "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]},
*
* {"a": "aaa", "b": {"x": "x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
* "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]}]
*
* @throws Exception
*/
@Test
public void testNormalizeSimpleFields_bx() throws Exception {
NormalizeProperties properties = new NormalizeProperties("test");
properties.init();
properties.schemaListener.afterSchema();
properties.isList.setValue(false);
properties.trim.setValue(true);
properties.discardTrailingEmptyStr.setValue(true);
// Normalize `b.x` simple field
properties.columnToNormalize.setValue("b.x");
NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
Assert.assertEquals(2, outputs.size());
GenericRecord expectedRecordX1Y = //
new GenericRecordBuilder(inputSchemaXY).set("x", //
"x1").set("y", //
inputRecordDE).build();
GenericRecord expectedRecordX2Y = //
new GenericRecordBuilder(inputSchemaXY).set("x", //
"x2").set("y", //
inputRecordDE).build();
GenericRecord expectedParentRecordX1 = //
new GenericRecordBuilder(inputParentSchema).set("a", //
"aaa").set("b", //
expectedRecordX1Y).set("c", //
inputRecordFG).set("m", //
listInputRecordM).build();
GenericRecord expectedParentRecordX2 = //
new GenericRecordBuilder(inputParentSchema).set("a", //
"aaa").set("b", //
expectedRecordX2Y).set("c", //
inputRecordFG).set("m", //
listInputRecordM).build();
GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
Assert.assertEquals(expectedParentRecordX1.toString(), outputRecord1.toString());
Assert.assertEquals(expectedParentRecordX1.getSchema().toString(), outputRecord1.getSchema().toString());
Assert.assertEquals(expectedParentRecordX2.toString(), outputRecord2.toString());
Assert.assertEquals(expectedParentRecordX2.getSchema().toString(), outputRecord2.getSchema().toString());
}
use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.
the class NormalizeDoFnTest method testNormalizeArrayFields_cg.
/**
* Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
*
* Normalize array field: c.g`
*
* The schema of g must change from a list to a simple object. Expected normalized results of the field `c.g`:
*
* [{"a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c":
* {"f": "f", "g": {"h": "h1", "i": "i2"}}, "m": ["m1", "m2", "m3"]},
*
* {"a": "aaa", "b": {"x": "x1;x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c":
* {"f": "f", "g": {"h": "h2", "i": "i1"}}, "m": ["m1", "m2", "m3"]}]
*
* @throws Exception
*/
@Test
public void testNormalizeArrayFields_cg() throws Exception {
NormalizeProperties properties = new NormalizeProperties("test");
properties.init();
properties.schemaListener.afterSchema();
// Normalize `c.g` array field
properties.isList.setValue(true);
properties.columnToNormalize.setValue("c.g");
NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
Assert.assertEquals(2, outputs.size());
Schema expectedSchemaHI = SchemaBuilder.record(//
"inputRowHI").fields().name("h").type().optional().stringType().name("i").type().optional().stringType().endRecord();
Schema expectedSchemaFG = SchemaBuilder.record(//
"inputRowFG").fields().name("f").type().optional().stringType().name("g").type(expectedSchemaHI).noDefault().endRecord();
Schema expectedParentSchema = SchemaBuilder.record(//
"inputParentRow").fields().name("a").type().optional().stringType().name("b").type(inputSchemaXY).noDefault().name("c").type(expectedSchemaFG).noDefault().name("m").type(inputSchemaListM).noDefault().endRecord();
GenericRecord expectedRecordFG1 = //
new GenericRecordBuilder(expectedSchemaFG).set("f", //
"f").set("g", //
inputRecordHI1).build();
GenericRecord expectedParentRecordG1 = //
new GenericRecordBuilder(expectedParentSchema).set("a", //
"aaa").set("b", //
inputRecordXY).set("c", //
expectedRecordFG1).set("m", //
listInputRecordM).build();
GenericRecord expectedRecordFG2 = //
new GenericRecordBuilder(expectedSchemaFG).set("f", //
"f").set("g", //
inputRecordHI2).build();
GenericRecord expectedParentRecordG2 = //
new GenericRecordBuilder(expectedParentSchema).set("a", //
"aaa").set("b", //
inputRecordXY).set("c", //
expectedRecordFG2).set("m", //
listInputRecordM).build();
GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
Assert.assertEquals(expectedParentRecordG1.toString(), outputRecord1.toString());
Assert.assertEquals(expectedParentRecordG1.getSchema().toString(), outputRecord1.getSchema().toString());
Assert.assertEquals(expectedParentRecordG2.toString(), outputRecord2.toString());
Assert.assertEquals(expectedParentRecordG2.getSchema().toString(), outputRecord2.getSchema().toString());
}
use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.
the class NormalizeDoFnTest method testNormalizeComplexFields_byd.
/**
* Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
*
* Normalize complex field: `b.y.d`
*
* Expected: no change
*
* @throws Exception
*/
@Test
public void testNormalizeComplexFields_byd() throws Exception {
NormalizeProperties properties = new NormalizeProperties("test");
properties.init();
properties.schemaListener.afterSchema();
// Normalize `b.y.d` complex field
properties.columnToNormalize.setValue("b.y.d");
NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
Assert.assertEquals(1, outputs.size());
GenericRecord outputRecord = (GenericRecord) outputs.get(0);
Assert.assertEquals(inputParentRecord.toString(), outputRecord.toString());
Assert.assertEquals(inputParentRecord.getSchema().toString(), outputRecord.getSchema().toString());
}
use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.
the class NormalizeDoFnTest method testVariableDuplication.
/**
* This test will normalize `b.x`. It will create 2 output. We are going to see if modifying the first output will
* not have any impact on the second one.
*
* Normalize simple field: `b.x`
*
* Expected normalized results of the field `b.x`:
*
* [{"a": "aaa", "b": {"x": "x1", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
* "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]},
*
* {"a": "aaa", "b": {"x": "x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
* "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]}]
*
* After modification : [{"a": "MODIFIED_A", "b": {"x": "MODIFIED_X1", "y": {"d": {"j": [{"l": "MODIFIED_L1"}, {"l":
* "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f": "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m":
* ["m1", "m2", "m3", "MODIFIED_M1"]},
*
* {"a": "aaa", "b": {"x": "x2", "y": {"d": {"j": [{"l": "l1"}, {"l": "l2"}], "k": "k1;k2"}, "e": "e"}}, "c": {"f":
* "f", "g": [{"h": "h1", "i": "i2"}, {"h": "h2", "i": "i1"}]}, "m": ["m1", "m2", "m3"]}]
*
* @throws Exception
*/
@SuppressWarnings("unchecked")
@Test
public void testVariableDuplication() throws Exception {
NormalizeProperties properties = new NormalizeProperties("test");
properties.init();
properties.schemaListener.afterSchema();
properties.isList.setValue(false);
properties.trim.setValue(true);
properties.discardTrailingEmptyStr.setValue(true);
// Normalize `b.x` simple field
properties.columnToNormalize.setValue("b.x");
NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
Assert.assertEquals(2, outputs.size());
GenericRecord expectedRecordX1Y = //
new GenericRecordBuilder(inputSchemaXY).set("x", //
"x1").set("y", //
inputRecordDE).build();
GenericRecord expectedRecordX2Y = //
new GenericRecordBuilder(inputSchemaXY).set("x", //
"x2").set("y", //
inputRecordDE).build();
GenericRecord expectedParentRecordX1 = //
new GenericRecordBuilder(inputParentSchema).set("a", //
"aaa").set("b", //
expectedRecordX1Y).set("c", //
inputRecordFG).set("m", //
listInputRecordM).build();
GenericRecord expectedParentRecordX2 = //
new GenericRecordBuilder(inputParentSchema).set("a", //
"aaa").set("b", //
expectedRecordX2Y).set("c", //
inputRecordFG).set("m", //
listInputRecordM).build();
// test initial output
GenericRecord outputRecord1 = (GenericRecord) outputs.get(0);
GenericRecord outputRecord2 = (GenericRecord) outputs.get(1);
Assert.assertEquals(expectedParentRecordX1.toString(), outputRecord1.toString());
Assert.assertEquals(expectedParentRecordX1.getSchema().toString(), outputRecord1.getSchema().toString());
Assert.assertEquals(expectedParentRecordX2.toString(), outputRecord2.toString());
Assert.assertEquals(expectedParentRecordX2.getSchema().toString(), outputRecord2.getSchema().toString());
// modify outputRecord1
// Test a simple variable
outputRecord1.put("a", "MODIFIED_A");
// Test a hierarchical variable
((GenericRecord) outputRecord1.get("b")).put("x", "MODIFIED_X1");
// Test a looped variable
AbstractList<GenericRecord> j = (AbstractList<GenericRecord>) ((GenericRecord) ((GenericRecord) ((GenericRecord) outputRecord1.get("b")).get("y")).get("d")).get("j");
j.get(0).put("l", "MODIFIED_L1");
// Check outputRecord2
Assert.assertNotEquals(expectedParentRecordX1.toString(), outputRecord1.toString());
Assert.assertEquals(expectedParentRecordX2.toString(), outputRecord2.toString());
Assert.assertEquals(expectedParentRecordX2.getSchema().toString(), outputRecord2.getSchema().toString());
}
use of org.talend.components.processing.definition.normalize.NormalizeProperties in project components by Talend.
the class NormalizeDoFnTest method testNormalizeComplexFields_b.
/**
* Input parent record: {@link NormalizeDoFnTest#inputParentRecord}
*
* Normalize complex field: `b`
*
* Expected: no change
*
* @throws Exception
*/
@Test
public void testNormalizeComplexFields_b() throws Exception {
NormalizeProperties properties = new NormalizeProperties("test");
properties.init();
properties.schemaListener.afterSchema();
// Normalize `b` complex field
properties.columnToNormalize.setValue("b");
NormalizeDoFn function = new NormalizeDoFn().withProperties(properties);
DoFnTester<IndexedRecord, IndexedRecord> fnTester = DoFnTester.of(function);
List<IndexedRecord> outputs = fnTester.processBundle(inputParentRecord);
Assert.assertEquals(1, outputs.size());
GenericRecord outputRecord = (GenericRecord) outputs.get(0);
Assert.assertEquals(inputParentRecord.toString(), outputRecord.toString());
Assert.assertEquals(inputParentRecord.getSchema().toString(), outputRecord.getSchema().toString());
}
Aggregations