use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestInputOutputFormat method testCombinationInputFormatWithAcid.
// test non-vectorized, acid, combine
@Test
public void testCombinationInputFormatWithAcid() throws Exception {
// get the object inspector for MyRow
StructObjectInspector inspector;
final int PARTITIONS = 2;
final int BUCKETS = 3;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combinationAcid", inspector, false, PARTITIONS);
// write the orc file to the mock file system
Path[] partDir = new Path[PARTITIONS];
String[] paths = conf.getStrings("mapred.input.dir");
for (int p = 0; p < PARTITIONS; ++p) {
partDir[p] = new Path(paths[p]);
}
// write a base file in partition 0
OrcRecordUpdater writer = new OrcRecordUpdater(partDir[0], new AcidOutputFormat.Options(conf).maximumTransactionId(10).writingBase(true).bucket(0).inspector(inspector).finalDestination(partDir[0]));
for (int i = 0; i < 10; ++i) {
writer.insert(10, new MyRow(i, 2 * i));
}
writer.close(false);
// base file
Path base0 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00000");
setBlocks(base0, conf, new MockBlock("host1", "host2"));
// write a delta file in partition 0
writer = new OrcRecordUpdater(partDir[0], new AcidOutputFormat.Options(conf).maximumTransactionId(10).writingBase(true).bucket(1).inspector(inspector).finalDestination(partDir[0]));
for (int i = 10; i < 20; ++i) {
writer.insert(10, new MyRow(i, 2 * i));
}
writer.close(false);
Path base1 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00001");
setBlocks(base1, conf, new MockBlock("host1", "host2"));
// write three files in partition 1
for (int bucket = 0; bucket < BUCKETS; ++bucket) {
Path path = new Path(partDir[1], "00000" + bucket + "_0");
Writer orc = OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
orc.addRow(new MyRow(1, 2));
orc.close();
setBlocks(path, conf, new MockBlock("host3", "host4"));
}
// call getsplits
conf.setInt(hive_metastoreConstants.BUCKET_COUNT, BUCKETS);
HiveInputFormat<?, ?> inputFormat = new CombineHiveInputFormat<WritableComparable, Writable>();
InputSplit[] splits = inputFormat.getSplits(conf, 1);
assertEquals(3, splits.length);
HiveInputFormat.HiveInputSplit split = (HiveInputFormat.HiveInputSplit) splits[0];
assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", split.inputFormatClassName());
assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000", split.getPath().toString());
assertEquals(0, split.getStart());
assertEquals(607, split.getLength());
split = (HiveInputFormat.HiveInputSplit) splits[1];
assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", split.inputFormatClassName());
assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001", split.getPath().toString());
assertEquals(0, split.getStart());
assertEquals(629, split.getLength());
CombineHiveInputFormat.CombineHiveInputSplit combineSplit = (CombineHiveInputFormat.CombineHiveInputSplit) splits[2];
assertEquals(BUCKETS, combineSplit.getNumPaths());
for (int bucket = 0; bucket < BUCKETS; ++bucket) {
assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0", combineSplit.getPath(bucket).toString());
assertEquals(0, combineSplit.getOffset(bucket));
assertEquals(241, combineSplit.getLength(bucket));
}
String[] hosts = combineSplit.getLocations();
assertEquals(2, hosts.length);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestInputOutputFormat method testVectorizationWithBuckets.
/**
* Test vectorization, non-acid, non-combine.
* @throws Exception
*/
@Test
public void testVectorizationWithBuckets() throws Exception {
// get the object inspector for MyRow
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "vectorBuckets", inspector, true, 1);
// write the orc file to the mock file system
Path path = new Path(conf.get("mapred.input.dir") + "/0_0");
Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
for (int i = 0; i < 10; ++i) {
writer.addRow(new MyRow(i, 2 * i));
}
writer.close();
setBlocks(path, conf, new MockBlock("host0", "host1"));
// call getsplits
conf.setInt(hive_metastoreConstants.BUCKET_COUNT, 3);
HiveInputFormat<?, ?> inputFormat = new HiveInputFormat<WritableComparable, Writable>();
InputSplit[] splits = inputFormat.getSplits(conf, 10);
assertEquals(1, splits.length);
org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
NullWritable key = reader.createKey();
VectorizedRowBatch value = reader.createValue();
assertEquals(true, reader.next(key, value));
assertEquals(10, value.count());
LongColumnVector col0 = (LongColumnVector) value.cols[0];
for (int i = 0; i < 10; i++) {
assertEquals("checking " + i, i, col0.vector[i]);
}
assertEquals(false, reader.next(key, value));
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestOrcFile method testHiveDecimalAllNulls.
@Test
public void testHiveDecimalAllNulls() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(DecimalStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
// this is an invalid decimal value, getting HiveDecimal from it will return null
writer.addRow(new DecimalStruct(new HiveDecimalWritable("1.463040009E9".getBytes(), 8)));
writer.addRow(new DecimalStruct(null));
writer.close();
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector.getStructFieldRef("dec").getFieldObjectInspector();
RecordReader rows = reader.rows();
while (rows.hasNext()) {
Object row = rows.next(null);
assertEquals(null, doi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
}
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(2, stats[0].getNumberOfValues());
assertEquals(0, stats[1].getNumberOfValues());
assertEquals(true, stats[1].hasNull());
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class SerDeUtils method buildJSONString.
static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, String nullStr) {
switch(oi.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
if (o == null) {
sb.append(nullStr);
} else {
switch(poi.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean b = ((BooleanObjectInspector) poi).get(o);
sb.append(b ? "true" : "false");
break;
}
case BYTE:
{
sb.append(((ByteObjectInspector) poi).get(o));
break;
}
case SHORT:
{
sb.append(((ShortObjectInspector) poi).get(o));
break;
}
case INT:
{
sb.append(((IntObjectInspector) poi).get(o));
break;
}
case LONG:
{
sb.append(((LongObjectInspector) poi).get(o));
break;
}
case FLOAT:
{
sb.append(((FloatObjectInspector) poi).get(o));
break;
}
case DOUBLE:
{
sb.append(((DoubleObjectInspector) poi).get(o));
break;
}
case STRING:
{
sb.append('"');
sb.append(escapeString(((StringObjectInspector) poi).getPrimitiveJavaObject(o)));
sb.append('"');
break;
}
case CHAR:
{
sb.append('"');
sb.append(escapeString(((HiveCharObjectInspector) poi).getPrimitiveJavaObject(o).toString()));
sb.append('"');
break;
}
case VARCHAR:
{
sb.append('"');
sb.append(escapeString(((HiveVarcharObjectInspector) poi).getPrimitiveJavaObject(o).toString()));
sb.append('"');
break;
}
case DATE:
{
sb.append('"');
sb.append(((DateObjectInspector) poi).getPrimitiveWritableObject(o));
sb.append('"');
break;
}
case TIMESTAMP:
{
sb.append('"');
sb.append(((TimestampObjectInspector) poi).getPrimitiveWritableObject(o));
sb.append('"');
break;
}
case BINARY:
{
BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
Text txt = new Text();
txt.set(bw.getBytes(), 0, bw.getLength());
sb.append(txt.toString());
break;
}
case DECIMAL:
{
sb.append(((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o));
break;
}
case INTERVAL_YEAR_MONTH:
{
sb.append(((HiveIntervalYearMonthObjectInspector) oi).getPrimitiveJavaObject(o));
break;
}
case INTERVAL_DAY_TIME:
{
sb.append(((HiveIntervalDayTimeObjectInspector) oi).getPrimitiveJavaObject(o));
break;
}
default:
throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory());
}
}
break;
}
case LIST:
{
ListObjectInspector loi = (ListObjectInspector) oi;
ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
List<?> olist = loi.getList(o);
if (olist == null) {
sb.append(nullStr);
} else {
sb.append(LBRACKET);
for (int i = 0; i < olist.size(); i++) {
if (i > 0) {
sb.append(COMMA);
}
buildJSONString(sb, olist.get(i), listElementObjectInspector, JSON_NULL);
}
sb.append(RBRACKET);
}
break;
}
case MAP:
{
MapObjectInspector moi = (MapObjectInspector) oi;
ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
Map<?, ?> omap = moi.getMap(o);
if (omap == null) {
sb.append(nullStr);
} else {
sb.append(LBRACE);
boolean first = true;
for (Object entry : omap.entrySet()) {
if (first) {
first = false;
} else {
sb.append(COMMA);
}
Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
buildJSONString(sb, e.getKey(), mapKeyObjectInspector, JSON_NULL);
sb.append(COLON);
buildJSONString(sb, e.getValue(), mapValueObjectInspector, JSON_NULL);
}
sb.append(RBRACE);
}
break;
}
case STRUCT:
{
StructObjectInspector soi = (StructObjectInspector) oi;
List<? extends StructField> structFields = soi.getAllStructFieldRefs();
if (o == null) {
sb.append(nullStr);
} else {
sb.append(LBRACE);
for (int i = 0; i < structFields.size(); i++) {
if (i > 0) {
sb.append(COMMA);
}
sb.append(QUOTE);
sb.append(structFields.get(i).getFieldName());
sb.append(QUOTE);
sb.append(COLON);
buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), structFields.get(i).getFieldObjectInspector(), JSON_NULL);
}
sb.append(RBRACE);
}
break;
}
case UNION:
{
UnionObjectInspector uoi = (UnionObjectInspector) oi;
if (o == null) {
sb.append(nullStr);
} else {
sb.append(LBRACE);
sb.append(uoi.getTag(o));
sb.append(COLON);
buildJSONString(sb, uoi.getField(o), uoi.getObjectInspectors().get(uoi.getTag(o)), JSON_NULL);
sb.append(RBRACE);
}
break;
}
default:
throw new RuntimeException("Unknown type in ObjectInspector!");
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class BinarySortableSerDe method serialize.
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
output.reset();
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
for (int i = 0; i < columnNames.size(); i++) {
serialize(output, soi.getStructFieldData(obj, fields.get(i)), fields.get(i).getFieldObjectInspector(), columnSortOrderIsDesc[i], columnNullMarker[i], columnNotNullMarker[i]);
}
serializeBytesWritable.set(output.getData(), 0, output.getLength());
return serializeBytesWritable;
}
Aggregations