use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class VectorExtractRow method extractRowColumn.
/**
* Extract a row's column object from the ColumnVector at batchIndex in the VectorizedRowBatch.
*
* @param batch
* @param batchIndex
* @param logicalColumnIndex
* @return
*/
public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) {
final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
ColumnVector colVector = batch.cols[projectionColumnNum];
if (colVector == null) {
// may ask for them..
return null;
}
int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
return null;
}
Category category = categories[logicalColumnIndex];
switch(category) {
case PRIMITIVE:
{
Writable primitiveWritable = primitiveWritables[logicalColumnIndex];
PrimitiveCategory primitiveCategory = primitiveCategories[logicalColumnIndex];
switch(primitiveCategory) {
case VOID:
return null;
case BOOLEAN:
((BooleanWritable) primitiveWritable).set(((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex] == 0 ? false : true);
return primitiveWritable;
case BYTE:
((ByteWritable) primitiveWritable).set((byte) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case SHORT:
((ShortWritable) primitiveWritable).set((short) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case INT:
((IntWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case LONG:
((LongWritable) primitiveWritable).set(((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case TIMESTAMP:
((TimestampWritable) primitiveWritable).set(((TimestampColumnVector) batch.cols[projectionColumnNum]).asScratchTimestamp(adjustedIndex));
return primitiveWritable;
case DATE:
((DateWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case FLOAT:
((FloatWritable) primitiveWritable).set((float) ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case DOUBLE:
((DoubleWritable) primitiveWritable).set(((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case BINARY:
{
BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
byte[] bytes = bytesColVector.vector[adjustedIndex];
int start = bytesColVector.start[adjustedIndex];
int length = bytesColVector.length[adjustedIndex];
if (bytes == null) {
LOG.info("null binary entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
}
BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
bytesWritable.set(bytes, start, length);
return primitiveWritable;
}
case STRING:
{
BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
byte[] bytes = bytesColVector.vector[adjustedIndex];
int start = bytesColVector.start[adjustedIndex];
int length = bytesColVector.length[adjustedIndex];
if (bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
}
// Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
((Text) primitiveWritable).set(bytes, start, length);
return primitiveWritable;
}
case VARCHAR:
{
BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
byte[] bytes = bytesColVector.vector[adjustedIndex];
int start = bytesColVector.start[adjustedIndex];
int length = bytesColVector.length[adjustedIndex];
if (bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
}
int adjustedLength = StringExpr.truncate(bytes, start, length, maxLengths[logicalColumnIndex]);
HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable;
hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
return primitiveWritable;
}
case CHAR:
{
BytesColumnVector bytesColVector = ((BytesColumnVector) batch.cols[projectionColumnNum]);
byte[] bytes = bytesColVector.vector[adjustedIndex];
int start = bytesColVector.start[adjustedIndex];
int length = bytesColVector.length[adjustedIndex];
if (bytes == null) {
nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
}
int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, maxLengths[logicalColumnIndex]);
HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable;
hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), maxLengths[logicalColumnIndex]);
return primitiveWritable;
}
case DECIMAL:
// The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
((HiveDecimalWritable) primitiveWritable).set(((DecimalColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case INTERVAL_YEAR_MONTH:
((HiveIntervalYearMonthWritable) primitiveWritable).set((int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]);
return primitiveWritable;
case INTERVAL_DAY_TIME:
((HiveIntervalDayTimeWritable) primitiveWritable).set(((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).asScratchIntervalDayTime(adjustedIndex));
return primitiveWritable;
default:
throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported");
}
}
default:
throw new RuntimeException("Category " + category.name() + " not supported");
}
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TestOrcFile method testSeek.
@Test
public void testSeek() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(200000).bufferSize(65536).rowIndexStride(1000));
Random rand = new Random(42);
final int COUNT = 32768;
long[] intValues = new long[COUNT];
double[] doubleValues = new double[COUNT];
String[] stringValues = new String[COUNT];
BytesWritable[] byteValues = new BytesWritable[COUNT];
String[] words = new String[128];
for (int i = 0; i < words.length; ++i) {
words[i] = Integer.toHexString(rand.nextInt());
}
for (int i = 0; i < COUNT / 2; ++i) {
intValues[2 * i] = rand.nextLong();
intValues[2 * i + 1] = intValues[2 * i];
stringValues[2 * i] = words[rand.nextInt(words.length)];
stringValues[2 * i + 1] = stringValues[2 * i];
}
for (int i = 0; i < COUNT; ++i) {
doubleValues[i] = rand.nextDouble();
byte[] buf = new byte[20];
rand.nextBytes(buf);
byteValues[i] = new BytesWritable(buf);
}
for (int i = 0; i < COUNT; ++i) {
writer.addRow(createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i));
}
writer.close();
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(COUNT, reader.getNumberOfRows());
RecordReader rows = reader.rows();
OrcStruct row = null;
for (int i = COUNT - 1; i >= 0; --i) {
// we load the previous buffer of rows
if (i % COUNT == COUNT - 1) {
rows.seekToRow(i - (COUNT - 1));
}
rows.seekToRow(i);
row = (OrcStruct) rows.next(row);
BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i);
assertEquals(expected.boolean1.booleanValue(), ((BooleanWritable) row.getFieldValue(0)).get());
assertEquals(expected.byte1.byteValue(), ((ByteWritable) row.getFieldValue(1)).get());
assertEquals(expected.short1.shortValue(), ((ShortWritable) row.getFieldValue(2)).get());
assertEquals(expected.int1.intValue(), ((IntWritable) row.getFieldValue(3)).get());
assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
assertEquals(expected.float1, ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
assertEquals(expected.double1, ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
assertEquals(expected.bytes1, row.getFieldValue(7));
assertEquals(expected.string1, row.getFieldValue(8));
List<InnerStruct> expectedList = expected.middle.list;
List<OrcStruct> actualList = (List<OrcStruct>) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
compareList(expectedList, actualList);
compareList(expected.list, (List<OrcStruct>) row.getFieldValue(10));
}
rows.close();
Iterator<StripeInformation> stripeIterator = reader.getStripes().iterator();
long offsetOfStripe2 = 0;
long offsetOfStripe4 = 0;
long lastRowOfStripe2 = 0;
for (int i = 0; i < 5; ++i) {
StripeInformation stripe = stripeIterator.next();
if (i < 2) {
lastRowOfStripe2 += stripe.getNumberOfRows();
} else if (i == 2) {
offsetOfStripe2 = stripe.getOffset();
lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
} else if (i == 4) {
offsetOfStripe4 = stripe.getOffset();
}
}
boolean[] columns = new boolean[reader.getStatistics().length];
// long colulmn
columns[5] = true;
// text column
columns[9] = true;
rows = reader.rowsOptions(new Reader.Options().range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2).include(columns));
rows.seekToRow(lastRowOfStripe2);
for (int i = 0; i < 2; ++i) {
row = (OrcStruct) rows.next(row);
BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, (int) (lastRowOfStripe2 + i));
assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
assertEquals(expected.string1, row.getFieldValue(8));
}
rows.close();
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TestOrcFile method testZeroCopySeek.
@Test
public void testZeroCopySeek() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(200000).bufferSize(65536).rowIndexStride(1000));
Random rand = new Random(42);
final int COUNT = 32768;
long[] intValues = new long[COUNT];
double[] doubleValues = new double[COUNT];
String[] stringValues = new String[COUNT];
BytesWritable[] byteValues = new BytesWritable[COUNT];
String[] words = new String[128];
for (int i = 0; i < words.length; ++i) {
words[i] = Integer.toHexString(rand.nextInt());
}
for (int i = 0; i < COUNT / 2; ++i) {
intValues[2 * i] = rand.nextLong();
intValues[2 * i + 1] = intValues[2 * i];
stringValues[2 * i] = words[rand.nextInt(words.length)];
stringValues[2 * i + 1] = stringValues[2 * i];
}
for (int i = 0; i < COUNT; ++i) {
doubleValues[i] = rand.nextDouble();
byte[] buf = new byte[20];
rand.nextBytes(buf);
byteValues[i] = new BytesWritable(buf);
}
for (int i = 0; i < COUNT; ++i) {
writer.addRow(createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i));
}
writer.close();
writer = null;
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(COUNT, reader.getNumberOfRows());
/* enable zero copy record reader */
Configuration conf = new Configuration();
conf.setBoolean(OrcConf.USE_ZEROCOPY.getHiveConfName(), true);
RecordReader rows = reader.rows();
/* all tests are identical to the other seek() tests */
OrcStruct row = null;
for (int i = COUNT - 1; i >= 0; --i) {
// we load the previous buffer of rows
if (i % COUNT == COUNT - 1) {
rows.seekToRow(i - (COUNT - 1));
}
rows.seekToRow(i);
row = (OrcStruct) rows.next(row);
BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i);
assertEquals(expected.boolean1.booleanValue(), ((BooleanWritable) row.getFieldValue(0)).get());
assertEquals(expected.byte1.byteValue(), ((ByteWritable) row.getFieldValue(1)).get());
assertEquals(expected.short1.shortValue(), ((ShortWritable) row.getFieldValue(2)).get());
assertEquals(expected.int1.intValue(), ((IntWritable) row.getFieldValue(3)).get());
assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
assertEquals(expected.float1.floatValue(), ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
assertEquals(expected.double1.doubleValue(), ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
assertEquals(expected.bytes1, row.getFieldValue(7));
assertEquals(expected.string1, row.getFieldValue(8));
List<InnerStruct> expectedList = expected.middle.list;
List<OrcStruct> actualList = (List) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
compareList(expectedList, actualList);
compareList(expected.list, (List) row.getFieldValue(10));
}
rows.close();
Iterator<StripeInformation> stripeIterator = reader.getStripes().iterator();
long offsetOfStripe2 = 0;
long offsetOfStripe4 = 0;
long lastRowOfStripe2 = 0;
for (int i = 0; i < 5; ++i) {
StripeInformation stripe = stripeIterator.next();
if (i < 2) {
lastRowOfStripe2 += stripe.getNumberOfRows();
} else if (i == 2) {
offsetOfStripe2 = stripe.getOffset();
lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
} else if (i == 4) {
offsetOfStripe4 = stripe.getOffset();
}
}
boolean[] columns = new boolean[reader.getStatistics().length];
// long colulmn
columns[5] = true;
// text column
columns[9] = true;
/* use zero copy record reader */
rows = reader.rowsOptions(new Reader.Options().range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2).include(columns));
rows.seekToRow(lastRowOfStripe2);
for (int i = 0; i < 2; ++i) {
row = (OrcStruct) rows.next(row);
BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, (int) (lastRowOfStripe2 + i));
assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
assertEquals(expected.string1, row.getFieldValue(8));
}
rows.close();
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class GenericUDFReflect2 method evaluate.
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
Object targetObject = targetOI.getPrimitiveJavaObject(arguments[0].get());
if (targetObject == null) {
return null;
}
Object result = null;
try {
result = method.invoke(targetObject, setupParameters(arguments, 2));
} catch (InvocationTargetException e) {
throw new HiveException(e.getCause());
} catch (Exception e) {
throw new HiveException(e);
}
if (result == null) {
return null;
}
switch(returnOI.getPrimitiveCategory()) {
case VOID:
return null;
case BOOLEAN:
((BooleanWritable) returnObj).set((Boolean) result);
return returnObj;
case BYTE:
((ByteWritable) returnObj).set((Byte) result);
return returnObj;
case SHORT:
((ShortWritable) returnObj).set((Short) result);
return returnObj;
case INT:
((IntWritable) returnObj).set((Integer) result);
return returnObj;
case LONG:
((LongWritable) returnObj).set((Long) result);
return returnObj;
case FLOAT:
((FloatWritable) returnObj).set((Float) result);
return returnObj;
case DOUBLE:
((DoubleWritable) returnObj).set((Double) result);
return returnObj;
case STRING:
((Text) returnObj).set((String) result);
return returnObj;
case TIMESTAMP:
((TimestampWritable) returnObj).set((Timestamp) result);
return returnObj;
case BINARY:
((BytesWritable) returnObj).set((byte[]) result, 0, ((byte[]) result).length);
return returnObj;
case DECIMAL:
((HiveDecimalWritable) returnObj).set((HiveDecimal) result);
return returnObj;
}
throw new HiveException("Invalid type " + returnOI.getPrimitiveCategory());
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class GenericUDFOPNegative method evaluate.
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
if (arguments[0] == null) {
return null;
}
Object input = arguments[0].get();
if (input == null) {
return null;
}
input = converter.convert(input);
if (input == null) {
return null;
}
switch(resultOI.getPrimitiveCategory()) {
case BYTE:
byteWritable.set((byte) -(((ByteWritable) input).get()));
return byteWritable;
case SHORT:
shortWritable.set((short) -(((ShortWritable) input).get()));
return shortWritable;
case INT:
intWritable.set(-(((IntWritable) input).get()));
return intWritable;
case LONG:
longWritable.set(-(((LongWritable) input).get()));
return longWritable;
case FLOAT:
floatWritable.set(-(((FloatWritable) input).get()));
return floatWritable;
case DOUBLE:
doubleWritable.set(-(((DoubleWritable) input).get()));
return doubleWritable;
case DECIMAL:
decimalWritable.set((HiveDecimalWritable) input);
decimalWritable.mutateNegate();
return decimalWritable;
case INTERVAL_YEAR_MONTH:
HiveIntervalYearMonth intervalYearMonth = ((HiveIntervalYearMonthWritable) input).getHiveIntervalYearMonth();
this.intervalYearMonthWritable.set(intervalYearMonth.negate());
return this.intervalYearMonthWritable;
case INTERVAL_DAY_TIME:
HiveIntervalDayTime intervalDayTime = ((HiveIntervalDayTimeWritable) input).getHiveIntervalDayTime();
this.intervalDayTimeWritable.set(intervalDayTime.negate());
return intervalDayTimeWritable;
default:
// Should never happen.
throw new RuntimeException("Unexpected type in evaluating " + opName + ": " + resultOI.getPrimitiveCategory());
}
}
Aggregations