Search in sources :

Example 1 with TypeRef

use of com.twitter.elephantbird.util.TypeRef in project elephant-bird by twitter.

the class TestThriftToPig method bytesToTuple.

static <M extends TBase<?, ?>> Tuple bytesToTuple(M obj) throws TException, ExecException, IOException {
    // test ThriftBytesToTuple UDF.
    // first serialize obj and then invoke the UDF.
    TypeRef<M> typeRef = new TypeRef<M>(obj.getClass()) {
    };
    ThriftConverter<M> converter = ThriftConverter.newInstance(typeRef);
    ThriftBytesToTuple<M> tTuple = new ThriftBytesToTuple<M>(obj.getClass().getName());
    Tuple tuple = tupleFactory.newTuple(1);
    tuple.set(0, new DataByteArray(converter.toBytes(obj)));
    return tTuple.exec(tuple);
}
Also used : TypeRef(com.twitter.elephantbird.util.TypeRef) ThriftBytesToTuple(com.twitter.elephantbird.pig.piggybank.ThriftBytesToTuple) DataByteArray(org.apache.pig.data.DataByteArray) ThriftBytesToTuple(com.twitter.elephantbird.pig.piggybank.ThriftBytesToTuple) Tuple(org.apache.pig.data.Tuple)

Example 2 with TypeRef

use of com.twitter.elephantbird.util.TypeRef in project elephant-bird by twitter.

the class HiveMultiInputFormat method initialize.

private void initialize(FileSplit split, JobConf job) throws IOException {
    LOG.info("Initializing HiveMultiInputFormat for " + split + " with job " + job);
    String thriftClassName = null;
    Properties properties = null;
    if (!"".equals(HiveConf.getVar(job, HiveConf.ConfVars.PLAN))) {
        // Running as a Hive query. Use MapredWork for metadata.
        Map<String, PartitionDesc> partitionDescMap = Utilities.getMapRedWork(job).getPathToPartitionInfo();
        if (!partitionDescMap.containsKey(split.getPath().getParent().toUri().toString())) {
            throw new RuntimeException("Failed locating partition description for " + split.getPath().toUri().toString());
        }
        properties = partitionDescMap.get(split.getPath().getParent().toUri().toString()).getTableDesc().getProperties();
    } else if (job.get(HCatConstants.HCAT_KEY_JOB_INFO, null) != null) {
        // Running as an HCatalog query. Use InputJobInfo for metadata.
        InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(job.get(HCatConstants.HCAT_KEY_JOB_INFO));
        properties = inputJobInfo.getTableInfo().getStorerInfo().getProperties();
    } else if (job.get(Constants.SERIALIZATION_CLASS, null) != null) {
        // Running as an Presto query.
        thriftClassName = job.get(Constants.SERIALIZATION_CLASS);
    }
    if (properties != null) {
        thriftClassName = properties.getProperty(Constants.SERIALIZATION_CLASS);
    }
    if (thriftClassName == null) {
        throw new RuntimeException("Required property " + Constants.SERIALIZATION_CLASS + " is null.");
    }
    try {
        Class thriftClass = job.getClassByName(thriftClassName);
        setInputFormatInstance(new MultiInputFormat(new TypeRef(thriftClass) {
        }));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException("Failed getting class for " + thriftClassName);
    }
}
Also used : MultiInputFormat(com.twitter.elephantbird.mapreduce.input.MultiInputFormat) TypeRef(com.twitter.elephantbird.util.TypeRef) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) Properties(java.util.Properties) InputJobInfo(org.apache.hcatalog.mapreduce.InputJobInfo)

Example 3 with TypeRef

use of com.twitter.elephantbird.util.TypeRef in project elephant-bird by twitter.

the class TestThriftToPig method thriftToPig.

static <M extends TBase<?, ?>> Tuple thriftToPig(M obj) throws TException {
    // it is very inefficient to create one ThriftToPig for each Thrift object,
    // but good enough for unit testing.
    TypeRef<M> typeRef = new TypeRef<M>(obj.getClass()) {
    };
    ThriftToPig<M> thriftToPig = ThriftToPig.newInstance(typeRef);
    Tuple t = thriftToPig.getPigTuple(obj);
    // test projected tuple. project a subset of fields based on field name.
    List<Field> tFields = thriftToPig.getTStructDescriptor().getFields();
    List<Integer> idxList = Lists.newArrayList();
    RequiredFieldList reqFieldList = new RequiredFieldList();
    for (int i = 0; i < tFields.size(); i++) {
        String name = tFields.get(i).getName();
        if (name.hashCode() % 2 == 0) {
            RequiredField rf = new RequiredField();
            rf.setAlias(name);
            rf.setIndex(i);
            reqFieldList.add(rf);
            idxList.add(i);
        }
    }
    try {
        Tuple pt = new ProjectedThriftTupleFactory<M>(typeRef, reqFieldList).newTuple(obj);
        int pidx = 0;
        for (int idx : idxList) {
            if (t.get(idx) != pt.get(pidx)) {
                // if both are not nulls
                assertEquals(t.get(idx).toString(), pt.get(pidx).toString());
            }
            pidx++;
        }
    } catch (ExecException e) {
        // not expected
        throw new TException(e);
    }
    // return the full tuple
    return t;
}
Also used : TException(org.apache.thrift.TException) TypeRef(com.twitter.elephantbird.util.TypeRef) ExecException(org.apache.pig.backend.executionengine.ExecException) Field(com.twitter.elephantbird.thrift.TStructDescriptor.Field) RequiredField(org.apache.pig.LoadPushDown.RequiredField) RequiredFieldList(org.apache.pig.LoadPushDown.RequiredFieldList) RequiredField(org.apache.pig.LoadPushDown.RequiredField) ThriftBytesToTuple(com.twitter.elephantbird.pig.piggybank.ThriftBytesToTuple) Tuple(org.apache.pig.data.Tuple)

Example 4 with TypeRef

use of com.twitter.elephantbird.util.TypeRef in project elephant-bird by twitter.

the class TestProtobufWritable method setUp.

@BeforeClass
public static void setUp() {
    Person p1 = Person.newBuilder().setEmail("email1@example.com").setId(74).setName("Example Person").addPhone(PhoneNumber.newBuilder().setType(PhoneType.MOBILE).setNumber("2930423").build()).addPhone(PhoneNumber.newBuilder().setType(PhoneType.HOME).setNumber("214121").build()).build();
    Person p2 = Person.newBuilder().setEmail("email2@example.com").setId(7334).setName("Another person").addPhone(PhoneNumber.newBuilder().setType(PhoneType.MOBILE).setNumber("030303").build()).build();
    referenceAb = AddressBook.newBuilder().addPerson(p1).addPerson(p2).build();
    referenceAbWritable = new ProtobufWritable<AddressBook>(referenceAb, new TypeRef<AddressBook>() {
    });
}
Also used : AddressBook(com.twitter.data.proto.tutorial.AddressBookProtos.AddressBook) TypeRef(com.twitter.elephantbird.util.TypeRef) Person(com.twitter.data.proto.tutorial.AddressBookProtos.Person) BeforeClass(org.junit.BeforeClass)

Aggregations

TypeRef (com.twitter.elephantbird.util.TypeRef)4 ThriftBytesToTuple (com.twitter.elephantbird.pig.piggybank.ThriftBytesToTuple)2 Tuple (org.apache.pig.data.Tuple)2 AddressBook (com.twitter.data.proto.tutorial.AddressBookProtos.AddressBook)1 Person (com.twitter.data.proto.tutorial.AddressBookProtos.Person)1 MultiInputFormat (com.twitter.elephantbird.mapreduce.input.MultiInputFormat)1 Field (com.twitter.elephantbird.thrift.TStructDescriptor.Field)1 Properties (java.util.Properties)1 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)1 InputJobInfo (org.apache.hcatalog.mapreduce.InputJobInfo)1 RequiredField (org.apache.pig.LoadPushDown.RequiredField)1 RequiredFieldList (org.apache.pig.LoadPushDown.RequiredFieldList)1 ExecException (org.apache.pig.backend.executionengine.ExecException)1 DataByteArray (org.apache.pig.data.DataByteArray)1 TException (org.apache.thrift.TException)1 BeforeClass (org.junit.BeforeClass)1