Search in sources :

Example 1 with IndexedRecord

use of org.apache.avro.generic.IndexedRecord in project crunch by cloudera.

the class AvroIndexedRecordPartitionerTest method testGetPartition_IntegerMinValue.

@Test
public void testGetPartition_IntegerMinValue() {
    IndexedRecord indexedRecord = new MockIndexedRecord(Integer.MIN_VALUE);
    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
    assertEquals(0, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), Integer.MAX_VALUE));
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) AvroKey(org.apache.avro.mapred.AvroKey) AvroValue(org.apache.avro.mapred.AvroValue) Test(org.junit.Test)

Example 2 with IndexedRecord

use of org.apache.avro.generic.IndexedRecord in project crunch by cloudera.

the class AvroIndexedRecordPartitionerTest method testGetPartition.

@Test
public void testGetPartition() {
    IndexedRecord indexedRecord = new MockIndexedRecord(3);
    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
    assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
    assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) AvroKey(org.apache.avro.mapred.AvroKey) AvroValue(org.apache.avro.mapred.AvroValue) Test(org.junit.Test)

Example 3 with IndexedRecord

use of org.apache.avro.generic.IndexedRecord in project crunch by cloudera.

the class AvroIndexedRecordPartitionerTest method testGetPartition_NegativeHashValue.

@Test
public void testGetPartition_NegativeHashValue() {
    IndexedRecord indexedRecord = new MockIndexedRecord(-3);
    AvroKey<IndexedRecord> avroKey = new AvroKey<IndexedRecord>(indexedRecord);
    assertEquals(3, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 5));
    assertEquals(1, avroPartitioner.getPartition(avroKey, new AvroValue<Object>(), 2));
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) AvroKey(org.apache.avro.mapred.AvroKey) AvroValue(org.apache.avro.mapred.AvroValue) Test(org.junit.Test)

Example 4 with IndexedRecord

use of org.apache.avro.generic.IndexedRecord in project tdi-studio-se by Talend.

the class FlowVariablesWriterTest method testGetSuccessfulWritesSeveralData.

/**
     * Checks {@link FlowVariablesWriter#getSuccessfulWrites()} returns instance of {@link Iterable} with the same
     * number
     * of data as input {@link Iterable} has. All elements in output {@link Iterable} should be instances of Main data.
     * {@link RuntimeContainer} contains value of first flow variable value after this method call
     */
@Test
public void testGetSuccessfulWritesSeveralData() throws IOException {
    Schema mainSchema = //
    SchemaBuilder.record("Main").fields().name("name").type().stringType().noDefault().name("age").type().intType().noDefault().endRecord();
    Schema outOfBandSchema = //
    SchemaBuilder.record("OutOfBand").fields().name("id").type().intType().noDefault().endRecord();
    Schema rootSchema = RootSchemaUtils.createRootSchema(mainSchema, outOfBandSchema);
    IndexedRecord mainRecord1 = new GenericData.Record(mainSchema);
    mainRecord1.put(0, "Abraham Lincoln");
    mainRecord1.put(1, 208);
    IndexedRecord outOfBandRecord1 = new GenericData.Record(outOfBandSchema);
    outOfBandRecord1.put(0, 123);
    IndexedRecord mainRecord2 = new GenericData.Record(mainSchema);
    mainRecord2.put(0, "George Washington");
    mainRecord2.put(1, 284);
    IndexedRecord outOfBandRecord2 = new GenericData.Record(outOfBandSchema);
    outOfBandRecord2.put(0, 321);
    IndexedRecord rootRecord1 = new GenericData.Record(rootSchema);
    rootRecord1.put(0, mainRecord1);
    rootRecord1.put(1, outOfBandRecord1);
    IndexedRecord rootRecord2 = new GenericData.Record(rootSchema);
    rootRecord2.put(0, mainRecord2);
    rootRecord2.put(1, outOfBandRecord2);
    ArrayList<Object> records = new ArrayList<>();
    records.add(rootRecord1);
    records.add(rootRecord2);
    when(wrappedWriter.getSuccessfulWrites()).thenReturn(records);
    FlowVariablesWriter<Object> writer = new FlowVariablesWriter<>(wrappedWriter, runtimeContainer);
    Iterable<Object> actualDataIterable = writer.getSuccessfulWrites();
    Iterator<Object> actualDataIterator = actualDataIterable.iterator();
    assertTrue(actualDataIterator.hasNext());
    Object actualData1 = actualDataIterator.next();
    assertEquals(mainRecord1, actualData1);
    assertTrue(actualDataIterator.hasNext());
    Object actualData2 = actualDataIterator.next();
    assertEquals(mainRecord2, actualData2);
    assertFalse(actualDataIterator.hasNext());
    Object flowVariable = runtimeContainer.getComponentData("tComponent_1", "id");
    assertNotNull(flowVariable);
    assertThat(flowVariable, instanceOf(Integer.class));
    assertEquals(123, flowVariable);
    writer.close();
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) IndexedRecord(org.apache.avro.generic.IndexedRecord) Test(org.junit.Test)

Example 5 with IndexedRecord

use of org.apache.avro.generic.IndexedRecord in project tdi-studio-se by Talend.

the class FlowVariablesWriter method getRejectedWrites.

/**
     * Retrieves rejected writes from wrapped {@link WriterWithFeedback}. Cleans writes from flow variables (aka Out of band)
     * data,
     * stores flow variables in {@link RuntimeContainer} in case they are present
     * 
     * <p>
     * It checks whether {@link Iterable} content is Root records or not and chooses appropriate processing strategy
     * during first call to this method.
     * 
     * <p>
     * Flow variables data is retrieved and stored only from first element of incoming {@link Iterable}
     * It is assumed that {@link Iterable} contains only 1 element in most cases. More elements have no sense
     * for flow variables mechanism, so they are ignored.
     * 
     * <p>
     * Also it is assumed that every element in {@link Iterable} is instance of the same class (has same schema if it is
     * {@link IndexedRecord}). Otherwise errors could appeared during Runtime
     * 
     * @return {@link Iterable} with rejected data which is released from flow variables data
     */
@Override
public Iterable<Object> getRejectedWrites() {
    Iterable rejectedWrites = wrappedWriter.getRejectedWrites();
    if (rejectedWrites == null) {
        throw new NullPointerException("Null rejected writes is not allowed");
    }
    if (firstRejectData) {
        Iterator<?> writesIterator = rejectedWrites.iterator();
        if (writesIterator.hasNext()) {
            Object firstData = writesIterator.next();
            if (RootRecordUtils.isRootRecord(firstData)) {
                IndexedRecord rootRecord = (IndexedRecord) firstData;
                rejectDataProcessor = new FlowVariablesProcessor(runtimeContainer);
                ((FlowVariablesProcessor) rejectDataProcessor).initSchema(rootRecord);
            } else {
                rejectDataProcessor = new MainDataProcessor();
            }
            firstSuccessData = false;
        } else {
            return rejectedWrites;
        }
    }
    return rejectDataProcessor.processDataIterable(rejectedWrites);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord)

Aggregations

IndexedRecord (org.apache.avro.generic.IndexedRecord)24 Test (org.junit.Test)18 Schema (org.apache.avro.Schema)15 ArrayList (java.util.ArrayList)7 Date (java.util.Date)3 AvroKey (org.apache.avro.mapred.AvroKey)3 AvroValue (org.apache.avro.mapred.AvroValue)3 BigDecimal (java.math.BigDecimal)1 LinkedList (java.util.LinkedList)1 Field (org.apache.avro.Schema.Field)1