Search in sources :

Example 46 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class CreateSequenceFile method main.

public static void main(String[] args) throws Exception {
    // Read parameters
    int lines = 10;
    List<String> extraArgs = new ArrayList<String>();
    for (int ai = 0; ai < args.length; ai++) {
        if (args[ai].equals("-line") && ai + 1 < args.length) {
            lines = Integer.parseInt(args[ai + 1]);
            ai++;
        } else {
            extraArgs.add(args[ai]);
        }
    }
    if (extraArgs.size() != 1) {
        usage();
    }
    JobConf conf = new JobConf(CreateSequenceFile.class);
    ThriftSerializer serializer = new ThriftSerializer();
    // Open files
    SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, new Path(extraArgs.get(0)), BytesWritable.class, BytesWritable.class);
    // write to file
    BytesWritable key = new BytesWritable();
    Random rand = new Random(20081215);
    for (int i = 0; i < lines; i++) {
        ArrayList<Integer> alist = new ArrayList<Integer>();
        alist.add(i);
        alist.add(i * 2);
        alist.add(i * 3);
        ArrayList<String> slist = new ArrayList<String>();
        slist.add("" + i * 10);
        slist.add("" + i * 100);
        slist.add("" + i * 1000);
        ArrayList<IntString> islist = new ArrayList<IntString>();
        islist.add(new IntString(i * i, "" + i * i * i, i));
        HashMap<String, String> hash = new HashMap<String, String>();
        hash.put("key_" + i, "value_" + i);
        Map<String, Map<String, Map<String, PropValueUnion>>> unionMap = new HashMap<String, Map<String, Map<String, PropValueUnion>>>();
        Map<String, Map<String, PropValueUnion>> erMap = new HashMap<String, Map<String, PropValueUnion>>();
        Map<String, PropValueUnion> attrMap = new HashMap<String, PropValueUnion>();
        erMap.put("erVal" + i, attrMap);
        attrMap.put("value_" + i, PropValueUnion.doubleValue(1.0));
        unionMap.put("key_" + i, erMap);
        Complex complex = new Complex(rand.nextInt(), "record_" + (new Integer(i)).toString(), alist, slist, islist, hash, unionMap, PropValueUnion.stringValue("test" + i), PropValueUnion.unionMStringString(hash), PropValueUnion.lString(slist));
        Writable value = serializer.serialize(complex);
        writer.append(key, value);
    }
    // Add an all-null record
    Complex complex = new Complex(0, null, null, null, null, null, null, null, null, null);
    Writable value = serializer.serialize(complex);
    writer.append(key, value);
    // Close files
    writer.close();
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) PropValueUnion(org.apache.hadoop.hive.serde2.thrift.test.PropValueUnion) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString) Complex(org.apache.hadoop.hive.serde2.thrift.test.Complex) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString) SequenceFile(org.apache.hadoop.io.SequenceFile) Random(java.util.Random) JobConf(org.apache.hadoop.mapred.JobConf) Path(org.apache.hadoop.fs.Path) BytesWritable(org.apache.hadoop.io.BytesWritable) HashMap(java.util.HashMap) Map(java.util.Map)

Example 47 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class GenericUDFLength method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    byte[] data = null;
    if (isInputString) {
        String val = null;
        if (arguments[0] != null) {
            val = (String) stringConverter.convert(arguments[0].get());
        }
        if (val == null) {
            return null;
        }
        data = val.getBytes();
        int len = 0;
        for (int i = 0; i < data.length; i++) {
            if (GenericUDFUtils.isUtfStartByte(data[i])) {
                len++;
            }
        }
        result.set(len);
        return result;
    } else {
        BytesWritable val = null;
        if (arguments[0] != null) {
            val = (BytesWritable) binaryConverter.convert(arguments[0].get());
        }
        if (val == null) {
            return null;
        }
        result.set(val.getLength());
        return result;
    }
}
Also used : BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 48 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class GenericUDFInBloomFilter method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    // Return if either of the arguments is null
    if (arguments[0].get() == null || arguments[1].get() == null) {
        return null;
    }
    if (!initializedBloomFilter) {
        // Setup the bloom filter once
        try {
            BytesWritable bw = (BytesWritable) arguments[1].get();
            byte[] bytes = new byte[bw.getLength()];
            System.arraycopy(bw.getBytes(), 0, bytes, 0, bw.getLength());
            bloomFilter = BloomFilter.deserialize(new ByteArrayInputStream(bytes));
        } catch (IOException e) {
            throw new HiveException(e);
        }
        initializedBloomFilter = true;
    }
    // Check if the value is in bloom filter
    switch(((PrimitiveObjectInspector) valObjectInspector).getTypeInfo().getPrimitiveCategory()) {
        case BOOLEAN:
            boolean vBoolean = ((BooleanObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vBoolean ? 1 : 0);
        case BYTE:
            byte vByte = ((ByteObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vByte);
        case SHORT:
            short vShort = ((ShortObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vShort);
        case INT:
            int vInt = ((IntObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vInt);
        case LONG:
            long vLong = ((LongObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vLong);
        case FLOAT:
            float vFloat = ((FloatObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vFloat);
        case DOUBLE:
            double vDouble = ((DoubleObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vDouble);
        case DECIMAL:
            HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            int startIdx = vDecimal.toBytes(scratchBuffer);
            return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx);
        case DATE:
            DateWritable vDate = ((DateObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testLong(vDate.getDays());
        case TIMESTAMP:
            Timestamp vTimeStamp = ((TimestampObjectInspector) valObjectInspector).getPrimitiveJavaObject(arguments[0].get());
            return bloomFilter.testLong(vTimeStamp.getTime());
        case CHAR:
            Text vChar = ((HiveCharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getStrippedValue();
            return bloomFilter.testBytes(vChar.getBytes(), 0, vChar.getLength());
        case VARCHAR:
            Text vVarchar = ((HiveVarcharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getTextValue();
            return bloomFilter.testBytes(vVarchar.getBytes(), 0, vVarchar.getLength());
        case STRING:
            Text vString = ((StringObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vString.getBytes(), 0, vString.getLength());
        case BINARY:
            BytesWritable vBytes = ((BinaryObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vBytes.getBytes(), 0, vBytes.getLength());
        default:
            throw new UDFArgumentTypeException(0, "Bad primitive category " + ((PrimitiveTypeInfo) valObjectInspector).getPrimitiveCategory());
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) ByteArrayInputStream(java.io.ByteArrayInputStream)

Example 49 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class GenericUDFSha2 method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    if (digest == null) {
        return null;
    }
    digest.reset();
    if (isStr) {
        Text n = GenericUDFParamUtils.getTextValue(arguments, 0, converters);
        if (n == null) {
            return null;
        }
        digest.update(n.getBytes(), 0, n.getLength());
    } else {
        BytesWritable bWr = GenericUDFParamUtils.getBinaryValue(arguments, 0, converters);
        if (bWr == null) {
            return null;
        }
        digest.update(bWr.getBytes(), 0, bWr.getLength());
    }
    byte[] resBin = digest.digest();
    String resStr = Hex.encodeHexString(resBin);
    output.set(resStr);
    return output;
}
Also used : Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 50 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class GenericUDFOctetLength method evaluate.

@Override
public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException {
    byte[] data = null;
    if (isInputString) {
        String val = null;
        if (arguments[0] != null) {
            val = (String) stringConverter.convert(arguments[0].get());
        }
        if (val == null) {
            return null;
        }
        data = val.getBytes();
    } else {
        BytesWritable val = null;
        if (arguments[0] != null) {
            val = (BytesWritable) arguments[0].get();
        }
        if (val == null) {
            return null;
        }
        data = val.getBytes();
    }
    result.set(data.length);
    return result;
}
Also used : BytesWritable(org.apache.hadoop.io.BytesWritable)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)245 Text (org.apache.hadoop.io.Text)63 Test (org.junit.Test)51 LongWritable (org.apache.hadoop.io.LongWritable)46 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)45 IntWritable (org.apache.hadoop.io.IntWritable)36 IOException (java.io.IOException)34 Path (org.apache.hadoop.fs.Path)34 ArrayList (java.util.ArrayList)29 Configuration (org.apache.hadoop.conf.Configuration)28 Writable (org.apache.hadoop.io.Writable)27 BooleanWritable (org.apache.hadoop.io.BooleanWritable)25 FloatWritable (org.apache.hadoop.io.FloatWritable)24 Random (java.util.Random)23 List (java.util.List)22 SequenceFile (org.apache.hadoop.io.SequenceFile)22 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)21 FileSystem (org.apache.hadoop.fs.FileSystem)19 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)18 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)17