Search in sources :

Example 1 with ThriftWritable

use of com.twitter.elephantbird.mapreduce.io.ThriftWritable in project elephant-bird by twitter.

the class TestThriftMultiFormatLoader method setUp.

@Before
public void setUp() throws Exception {
    Configuration conf = new Configuration();
    Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf));
    pigServer = PigTestUtil.makePigServer();
    inputDir.mkdirs();
    // write to block file
    ThriftBlockWriter<TestPerson> blk_writer = new ThriftBlockWriter<TestPerson>(createLzoOut("1-block.lzo", conf), TestPerson.class);
    for (TestPerson rec : records) {
        blk_writer.write(rec);
    }
    blk_writer.close();
    // write tb64 lines
    LzoBinaryB64LineRecordWriter<TestPerson, ThriftWritable<TestPerson>> b64_writer = LzoBinaryB64LineRecordWriter.newThriftWriter(TestPerson.class, createLzoOut("2-b64.lzo", conf));
    for (TestPerson rec : records) {
        thriftWritable.set(rec);
        b64_writer.write(null, thriftWritable);
    }
    b64_writer.close(null);
}
Also used : ThriftWritable(com.twitter.elephantbird.mapreduce.io.ThriftWritable) Configuration(org.apache.hadoop.conf.Configuration) TestPerson(com.twitter.elephantbird.thrift.test.TestPerson) ThriftBlockWriter(com.twitter.elephantbird.mapreduce.io.ThriftBlockWriter) Before(org.junit.Before)

Example 2 with ThriftWritable

use of com.twitter.elephantbird.mapreduce.io.ThriftWritable in project druid by druid-io.

the class ThriftInputRowParser method parseBatch.

@Override
public List<InputRow> parseBatch(Object input) {
    if (parser == null) {
        // parser should be created when it is really used to avoid unnecessary initialization of the underlying
        // parseSpec.
        parser = parseSpec.makeParser();
    }
    // Place it this initialization in constructor will get ClassNotFoundException
    try {
        if (thriftClass == null) {
            thriftClass = getThriftClass();
        }
    } catch (IOException e) {
        throw new IAE(e, "failed to load jar [%s]", jarPath);
    } catch (ClassNotFoundException e) {
        throw new IAE(e, "class [%s] not found in jar", thriftClassName);
    } catch (InstantiationException | IllegalAccessException e) {
        throw new IAE(e, "instantiation thrift instance failed");
    }
    final String json;
    try {
        if (input instanceof ByteBuffer) {
            // realtime stream
            final byte[] bytes = ((ByteBuffer) input).array();
            TBase o = thriftClass.newInstance();
            ThriftDeserialization.detectAndDeserialize(bytes, o);
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else if (input instanceof BytesWritable) {
            // sequence file
            final byte[] bytes = ((BytesWritable) input).getBytes();
            TBase o = thriftClass.newInstance();
            ThriftDeserialization.detectAndDeserialize(bytes, o);
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else if (input instanceof ThriftWritable) {
            // LzoBlockThrift file
            TBase o = (TBase) ((ThriftWritable) input).get();
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else {
            throw new IAE("unsupport input class of [%s]", input.getClass());
        }
    } catch (IllegalAccessException | InstantiationException | TException e) {
        throw new IAE("some thing wrong with your thrift?");
    }
    Map<String, Object> record = parser.parseToMap(json);
    final List<String> dimensions;
    if (!this.dimensions.isEmpty()) {
        dimensions = this.dimensions;
    } else {
        dimensions = Lists.newArrayList(Sets.difference(record.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()));
    }
    return ImmutableList.of(new MapBasedInputRow(parseSpec.getTimestampSpec().extractTimestamp(record), dimensions, record));
}
Also used : TException(org.apache.thrift.TException) ThriftWritable(com.twitter.elephantbird.mapreduce.io.ThriftWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) IAE(org.apache.druid.java.util.common.IAE) ByteBuffer(java.nio.ByteBuffer) TBase(org.apache.thrift.TBase) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow)

Example 3 with ThriftWritable

use of com.twitter.elephantbird.mapreduce.io.ThriftWritable in project druid by druid-io.

the class ThriftInputRowParser method parse.

@Override
public InputRow parse(Object input) {
    // Place it this initialization in constructor will get ClassNotFoundException
    try {
        if (thriftClass == null) {
            thriftClass = getThriftClass();
        }
    } catch (IOException e) {
        throw new IAE(e, "failed to load jar [%s]", jarPath);
    } catch (ClassNotFoundException e) {
        throw new IAE(e, "class [%s] not found in jar", thriftClassName);
    } catch (InstantiationException | IllegalAccessException e) {
        throw new IAE(e, "instantiation thrift instance failed");
    }
    final String json;
    try {
        if (input instanceof ByteBuffer) {
            // realtime stream
            final byte[] bytes = ((ByteBuffer) input).array();
            TBase o = thriftClass.newInstance();
            ThriftDeserialization.detectAndDeserialize(bytes, o);
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else if (input instanceof BytesWritable) {
            // sequence file
            final byte[] bytes = ((BytesWritable) input).getBytes();
            TBase o = thriftClass.newInstance();
            ThriftDeserialization.detectAndDeserialize(bytes, o);
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else if (input instanceof ThriftWritable) {
            // LzoBlockThrift file
            TBase o = (TBase) ((ThriftWritable) input).get();
            json = ThriftDeserialization.SERIALIZER_SIMPLE_JSON.get().toString(o);
        } else {
            throw new IAE("unsupport input class of [%s]", input.getClass());
        }
    } catch (IllegalAccessException | InstantiationException | TException e) {
        throw new IAE("some thing wrong with your thrift?");
    }
    Map<String, Object> record = parser.parse(json);
    return new MapBasedInputRow(parseSpec.getTimestampSpec().extractTimestamp(record), parseSpec.getDimensionsSpec().getDimensionNames(), record);
}
Also used : TException(org.apache.thrift.TException) ThriftWritable(com.twitter.elephantbird.mapreduce.io.ThriftWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) IAE(com.metamx.common.IAE) ByteBuffer(java.nio.ByteBuffer) TBase(org.apache.thrift.TBase) MapBasedInputRow(io.druid.data.input.MapBasedInputRow)

Aggregations

ThriftWritable (com.twitter.elephantbird.mapreduce.io.ThriftWritable)3 IOException (java.io.IOException)2 ByteBuffer (java.nio.ByteBuffer)2 BytesWritable (org.apache.hadoop.io.BytesWritable)2 TBase (org.apache.thrift.TBase)2 TException (org.apache.thrift.TException)2 IAE (com.metamx.common.IAE)1 ThriftBlockWriter (com.twitter.elephantbird.mapreduce.io.ThriftBlockWriter)1 TestPerson (com.twitter.elephantbird.thrift.test.TestPerson)1 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)1 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)1 IAE (org.apache.druid.java.util.common.IAE)1 Configuration (org.apache.hadoop.conf.Configuration)1 Before (org.junit.Before)1