use of org.apache.avro.generic.IndexedRecord in project crunch by cloudera.
the class AvroIndexedRecordPartitionerTest method testGetPartition_IntegerMinValue.
@Test
public void testGetPartition_IntegerMinValue() {
IndexedRecord indexedRecord = new MockIndexedRecord(Integer.MIN_VALUE);
AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
assertEquals(0, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), Integer.MAX_VALUE));
}
use of org.apache.avro.generic.IndexedRecord in project crunch by cloudera.
the class AvroIndexedRecordPartitionerTest method testGetPartition.
@Test
public void testGetPartition() {
IndexedRecord indexedRecord = new MockIndexedRecord(3);
AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
}
use of org.apache.avro.generic.IndexedRecord in project crunch by cloudera.
the class AvroIndexedRecordPartitionerTest method testGetPartition_NegativeHashValue.
@Test
public void testGetPartition_NegativeHashValue() {
IndexedRecord indexedRecord = new MockIndexedRecord(-3);
AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
}
use of org.apache.avro.generic.IndexedRecord in project tdi-studio-se by Talend.
the class FlowVariablesWriterTest method testGetSuccessfulWritesSeveralData.
/**
* Checks {@link FlowVariablesWriter#getSuccessfulWrites()} returns instance of {@link Iterable} with the same
* number
* of data as input {@link Iterable} has. All elements in output {@link Iterable} should be instances of Main data.
* {@link RuntimeContainer} contains value of first flow variable value after this method call
*/
@Test
public void testGetSuccessfulWritesSeveralData() throws IOException {
Schema mainSchema = //
SchemaBuilder.record("Main").fields().name("name").type().stringType().noDefault().name("age").type().intType().noDefault().endRecord();
Schema outOfBandSchema = //
SchemaBuilder.record("OutOfBand").fields().name("id").type().intType().noDefault().endRecord();
Schema rootSchema = RootSchemaUtils.createRootSchema(mainSchema, outOfBandSchema);
IndexedRecord mainRecord1 = new GenericData.Record(mainSchema);
mainRecord1.put(0, "Abraham Lincoln");
mainRecord1.put(1, 208);
IndexedRecord outOfBandRecord1 = new GenericData.Record(outOfBandSchema);
outOfBandRecord1.put(0, 123);
IndexedRecord mainRecord2 = new GenericData.Record(mainSchema);
mainRecord2.put(0, "George Washington");
mainRecord2.put(1, 284);
IndexedRecord outOfBandRecord2 = new GenericData.Record(outOfBandSchema);
outOfBandRecord2.put(0, 321);
IndexedRecord rootRecord1 = new GenericData.Record(rootSchema);
rootRecord1.put(0, mainRecord1);
rootRecord1.put(1, outOfBandRecord1);
IndexedRecord rootRecord2 = new GenericData.Record(rootSchema);
rootRecord2.put(0, mainRecord2);
rootRecord2.put(1, outOfBandRecord2);
ArrayList<Object> records = new ArrayList<>();
records.add(rootRecord1);
records.add(rootRecord2);
when(wrappedWriter.getSuccessfulWrites()).thenReturn(records);
FlowVariablesWriter<Object> writer = new FlowVariablesWriter<>(wrappedWriter, runtimeContainer);
Iterable<Object> actualDataIterable = writer.getSuccessfulWrites();
Iterator<Object> actualDataIterator = actualDataIterable.iterator();
assertTrue(actualDataIterator.hasNext());
Object actualData1 = actualDataIterator.next();
assertEquals(mainRecord1, actualData1);
assertTrue(actualDataIterator.hasNext());
Object actualData2 = actualDataIterator.next();
assertEquals(mainRecord2, actualData2);
assertFalse(actualDataIterator.hasNext());
Object flowVariable = runtimeContainer.getComponentData("tComponent_1", "id");
assertNotNull(flowVariable);
assertThat(flowVariable, instanceOf(Integer.class));
assertEquals(123, flowVariable);
writer.close();
}
use of org.apache.avro.generic.IndexedRecord in project tdi-studio-se by Talend.
the class FlowVariablesWriter method getRejectedWrites.
/**
* Retrieves rejected writes from wrapped {@link WriterWithFeedback}. Cleans writes from flow variables (aka Out of band)
* data,
* stores flow variables in {@link RuntimeContainer} in case they are present
*
* <p>
* It checks whether {@link Iterable} content is Root records or not and chooses appropriate processing strategy
* during first call to this method.
*
* <p>
* Flow variables data is retrieved and stored only from first element of incoming {@link Iterable}
* It is assumed that {@link Iterable} contains only 1 element in most cases. More elements have no sense
* for flow variables mechanism, so they are ignored.
*
* <p>
* Also it is assumed that every element in {@link Iterable} is instance of the same class (has same schema if it is
* {@link IndexedRecord}). Otherwise errors could appeared during Runtime
*
* @return {@link Iterable} with rejected data which is released from flow variables data
*/
@Override
public Iterable<Object> getRejectedWrites() {
Iterable rejectedWrites = wrappedWriter.getRejectedWrites();
if (rejectedWrites == null) {
throw new NullPointerException("Null rejected writes is not allowed");
}
if (firstRejectData) {
Iterator<?> writesIterator = rejectedWrites.iterator();
if (writesIterator.hasNext()) {
Object firstData = writesIterator.next();
if (RootRecordUtils.isRootRecord(firstData)) {
IndexedRecord rootRecord = (IndexedRecord) firstData;
rejectDataProcessor = new FlowVariablesProcessor(runtimeContainer);
((FlowVariablesProcessor) rejectDataProcessor).initSchema(rootRecord);
} else {
rejectDataProcessor = new MainDataProcessor();
}
firstSuccessData = false;
} else {
return rejectedWrites;
}
}
return rejectDataProcessor.processDataIterable(rejectedWrites);
}
Aggregations