use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class CreateSequenceFile method main.
public static void main(String[] args) throws Exception {
// Read parameters
int lines = 10;
List<String> extraArgs = new ArrayList<String>();
for (int ai = 0; ai < args.length; ai++) {
if (args[ai].equals("-line") && ai + 1 < args.length) {
lines = Integer.parseInt(args[ai + 1]);
ai++;
} else {
extraArgs.add(args[ai]);
}
}
if (extraArgs.size() != 1) {
usage();
}
JobConf conf = new JobConf(CreateSequenceFile.class);
ThriftSerializer serializer = new ThriftSerializer();
// Open files
SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, new Path(extraArgs.get(0)), BytesWritable.class, BytesWritable.class);
// write to file
BytesWritable key = new BytesWritable();
Random rand = new Random(20081215);
for (int i = 0; i < lines; i++) {
ArrayList<Integer> alist = new ArrayList<Integer>();
alist.add(i);
alist.add(i * 2);
alist.add(i * 3);
ArrayList<String> slist = new ArrayList<String>();
slist.add("" + i * 10);
slist.add("" + i * 100);
slist.add("" + i * 1000);
ArrayList<IntString> islist = new ArrayList<IntString>();
islist.add(new IntString(i * i, "" + i * i * i, i));
HashMap<String, String> hash = new HashMap<String, String>();
hash.put("key_" + i, "value_" + i);
Map<String, Map<String, Map<String, PropValueUnion>>> unionMap = new HashMap<String, Map<String, Map<String, PropValueUnion>>>();
Map<String, Map<String, PropValueUnion>> erMap = new HashMap<String, Map<String, PropValueUnion>>();
Map<String, PropValueUnion> attrMap = new HashMap<String, PropValueUnion>();
erMap.put("erVal" + i, attrMap);
attrMap.put("value_" + i, PropValueUnion.doubleValue(1.0));
unionMap.put("key_" + i, erMap);
Complex complex = new Complex(rand.nextInt(), "record_" + (new Integer(i)).toString(), alist, slist, islist, hash, unionMap, PropValueUnion.stringValue("test" + i), PropValueUnion.unionMStringString(hash), PropValueUnion.lString(slist));
Writable value = serializer.serialize(complex);
writer.append(key, value);
}
// Add an all-null record
Complex complex = new Complex(0, null, null, null, null, null, null, null, null, null);
Writable value = serializer.serialize(complex);
writer.append(key, value);
// Close files
writer.close();
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class GenericUDFLength method evaluate.
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
byte[] data = null;
if (isInputString) {
String val = null;
if (arguments[0] != null) {
val = (String) stringConverter.convert(arguments[0].get());
}
if (val == null) {
return null;
}
data = val.getBytes();
int len = 0;
for (int i = 0; i < data.length; i++) {
if (GenericUDFUtils.isUtfStartByte(data[i])) {
len++;
}
}
result.set(len);
return result;
} else {
BytesWritable val = null;
if (arguments[0] != null) {
val = (BytesWritable) binaryConverter.convert(arguments[0].get());
}
if (val == null) {
return null;
}
result.set(val.getLength());
return result;
}
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class GenericUDFInBloomFilter method evaluate.
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
// Return if either of the arguments is null
if (arguments[0].get() == null || arguments[1].get() == null) {
return null;
}
if (!initializedBloomFilter) {
// Setup the bloom filter once
try {
BytesWritable bw = (BytesWritable) arguments[1].get();
byte[] bytes = new byte[bw.getLength()];
System.arraycopy(bw.getBytes(), 0, bytes, 0, bw.getLength());
bloomFilter = BloomFilter.deserialize(new ByteArrayInputStream(bytes));
} catch (IOException e) {
throw new HiveException(e);
}
initializedBloomFilter = true;
}
// Check if the value is in bloom filter
switch(((PrimitiveObjectInspector) valObjectInspector).getTypeInfo().getPrimitiveCategory()) {
case BOOLEAN:
boolean vBoolean = ((BooleanObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vBoolean ? 1 : 0);
case BYTE:
byte vByte = ((ByteObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vByte);
case SHORT:
short vShort = ((ShortObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vShort);
case INT:
int vInt = ((IntObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vInt);
case LONG:
long vLong = ((LongObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vLong);
case FLOAT:
float vFloat = ((FloatObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testDouble(vFloat);
case DOUBLE:
double vDouble = ((DoubleObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testDouble(vDouble);
case DECIMAL:
HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
int startIdx = vDecimal.toBytes(scratchBuffer);
return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx);
case DATE:
DateWritable vDate = ((DateObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testLong(vDate.getDays());
case TIMESTAMP:
Timestamp vTimeStamp = ((TimestampObjectInspector) valObjectInspector).getPrimitiveJavaObject(arguments[0].get());
return bloomFilter.testLong(vTimeStamp.getTime());
case CHAR:
Text vChar = ((HiveCharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getStrippedValue();
return bloomFilter.testBytes(vChar.getBytes(), 0, vChar.getLength());
case VARCHAR:
Text vVarchar = ((HiveVarcharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getTextValue();
return bloomFilter.testBytes(vVarchar.getBytes(), 0, vVarchar.getLength());
case STRING:
Text vString = ((StringObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testBytes(vString.getBytes(), 0, vString.getLength());
case BINARY:
BytesWritable vBytes = ((BinaryObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testBytes(vBytes.getBytes(), 0, vBytes.getLength());
default:
throw new UDFArgumentTypeException(0, "Bad primitive category " + ((PrimitiveTypeInfo) valObjectInspector).getPrimitiveCategory());
}
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class GenericUDFSha2 method evaluate.
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
if (digest == null) {
return null;
}
digest.reset();
if (isStr) {
Text n = GenericUDFParamUtils.getTextValue(arguments, 0, converters);
if (n == null) {
return null;
}
digest.update(n.getBytes(), 0, n.getLength());
} else {
BytesWritable bWr = GenericUDFParamUtils.getBinaryValue(arguments, 0, converters);
if (bWr == null) {
return null;
}
digest.update(bWr.getBytes(), 0, bWr.getLength());
}
byte[] resBin = digest.digest();
String resStr = Hex.encodeHexString(resBin);
output.set(resStr);
return output;
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class GenericUDFOctetLength method evaluate.
@Override
public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException {
byte[] data = null;
if (isInputString) {
String val = null;
if (arguments[0] != null) {
val = (String) stringConverter.convert(arguments[0].get());
}
if (val == null) {
return null;
}
data = val.getBytes();
} else {
BytesWritable val = null;
if (arguments[0] != null) {
val = (BytesWritable) arguments[0].get();
}
if (val == null) {
return null;
}
data = val.getBytes();
}
result.set(data.length);
return result;
}
Aggregations