use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class TestBinarySortableFast method testBinarySortableFast.
private void testBinarySortableFast(SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
}
BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
// Try to serialize
// One Writable per row.
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
int[][] perFieldWriteLengthsArray = new int[rowCount][];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
binarySortableSerializeWrite.set(output);
int[] perFieldWriteLengths = new int[columnCount];
for (int index = 0; index < writeColumnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfos[index], writable);
perFieldWriteLengths[index] = output.getLength();
}
perFieldWriteLengthsArray[i] = perFieldWriteLengths;
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
if (i > 0) {
int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]);
if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i);
System.out.println("serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
System.out.println("serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");
}
}
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
false, columnSortOrderIsDesc);
BytesWritable bytesWritable = serializeWriteBytes[i];
binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
binarySortableDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
}
/*
* Clip off one byte and expect to get an EOFException on the write field.
*/
BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
false, columnSortOrderIsDesc);
binarySortableDeserializeRead2.set(bytesWritable.getBytes(), 0, // One fewer byte.
bytesWritable.getLength() - 1);
for (int index = 0; index < writeColumnCount; index++) {
Writable writable = (Writable) row[index];
if (index == writeColumnCount - 1) {
boolean threw = false;
try {
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
} catch (EOFException e) {
// debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
// debugStackTrace = e.getStackTrace();
threw = true;
}
TestCase.assertTrue(threw);
} else {
if (useIncludeColumns && !columnsToInclude[index]) {
binarySortableDeserializeRead2.skipNextField();
} else {
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
}
}
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
// Note that regular SerDe doesn't tolerate fewer columns.
List<Object> deserializedRow;
if (doWriteFewerColumns) {
deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
} else {
deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
}
Object[] row = rows[i];
for (int index = 0; index < writeColumnCount; index++) {
Object expected = row[index];
Object object = deserializedRow.get(index);
if (expected == null || object == null) {
if (expected != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(expected)) {
fail("SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")");
}
}
}
}
// One Writable per row.
BytesWritable[] serdeBytes = new BytesWritable[rowCount];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// Since SerDe reuses memory, we will need to make a copy.
BytesWritable serialized;
if (doWriteFewerColumns) {
serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
} else {
serialized = (BytesWritable) serde.serialize(row, rowOI);
;
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(serialized);
byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
int mismatchPos = -1;
if (serDeOutput.length != serializeWriteExpected.length) {
for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
if (serDeOutput[b] != serializeWriteExpected[b]) {
mismatchPos = b;
break;
}
}
fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]));
}
List<Integer> differentPositions = new ArrayList();
for (int b = 0; b < serDeOutput.length; b++) {
if (serDeOutput[b] != serializeWriteExpected[b]) {
differentPositions.add(b);
}
}
if (differentPositions.size() > 0) {
List<String> serializeWriteExpectedFields = new ArrayList<String>();
List<String> serDeFields = new ArrayList<String>();
int f = 0;
int lastBegin = 0;
for (int b = 0; b < serDeOutput.length; b++) {
int writeLength = perFieldWriteLengthsArray[i][f];
if (b + 1 == writeLength) {
serializeWriteExpectedFields.add(displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
f++;
lastBegin = b + 1;
}
}
fail("SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(primitiveTypeInfos) + "\nrow " + Arrays.toString(row));
}
}
serdeBytes[i] = bytesWritable;
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
false, columnSortOrderIsDesc);
BytesWritable bytesWritable = serdeBytes[i];
binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
binarySortableDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class TestBinarySortableFast method testBinarySortableFastCase.
private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
SerdeRandomRowSource source = new SerdeRandomRowSource();
source.init(r);
int rowCount = 1000;
Object[][] rows = source.randomRows(rowCount);
if (doNonRandomFill) {
MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
}
// We need to operate on sorted data to fully test BinarySortable.
source.sort(rows);
StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
int columnCount = primitiveTypeInfos.length;
int writeColumnCount = columnCount;
StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
boolean doWriteFewerColumns = r.nextBoolean();
if (doWriteFewerColumns) {
writeColumnCount = 1 + r.nextInt(columnCount);
if (writeColumnCount == columnCount) {
doWriteFewerColumns = false;
} else {
writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
}
}
String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
String order;
order = StringUtils.leftPad("", columnCount, '+');
String nullOrder;
nullOrder = StringUtils.leftPad("", columnCount, 'a');
AbstractSerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
AbstractSerDe serde_ascending_fewer = null;
if (doWriteFewerColumns) {
String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
serde_ascending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder);
}
order = StringUtils.leftPad("", columnCount, '-');
nullOrder = StringUtils.leftPad("", columnCount, 'z');
AbstractSerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
AbstractSerDe serde_descending_fewer = null;
if (doWriteFewerColumns) {
String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
serde_descending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder);
}
boolean[] columnSortOrderIsDesc = new boolean[columnCount];
Arrays.fill(columnSortOrderIsDesc, false);
byte[] columnNullMarker = new byte[columnCount];
Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO);
byte[] columnNotNullMarker = new byte[columnCount];
Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE);
/*
* Acending.
*/
testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
true, primitiveTypeInfos, /* useIncludeColumns */
false, /* doWriteFewerColumns */
false, r);
testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
true, primitiveTypeInfos, /* useIncludeColumns */
true, /* doWriteFewerColumns */
false, r);
if (doWriteFewerColumns) {
testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
true, primitiveTypeInfos, /* useIncludeColumns */
false, /* doWriteFewerColumns */
true, r);
testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
true, primitiveTypeInfos, /* useIncludeColumns */
true, /* doWriteFewerColumns */
true, r);
}
/*
* Descending.
*/
Arrays.fill(columnSortOrderIsDesc, true);
testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
false, primitiveTypeInfos, /* useIncludeColumns */
false, /* doWriteFewerColumns */
false, r);
testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
false, primitiveTypeInfos, /* useIncludeColumns */
true, /* doWriteFewerColumns */
false, r);
if (doWriteFewerColumns) {
testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_descending_fewer, writeRowStructObjectInspector, /* ascending */
false, primitiveTypeInfos, /* useIncludeColumns */
false, /* doWriteFewerColumns */
true, r);
testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_descending_fewer, writeRowStructObjectInspector, /* ascending */
false, primitiveTypeInfos, /* useIncludeColumns */
true, /* doWriteFewerColumns */
true, r);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class TestAvroObjectInspectorGenerator method canHandleBytes.
// Avro considers bytes primitive, Hive doesn't. Make them list of tinyint.
@Test
public void canHandleBytes() throws SerDeException {
Schema s = AvroSerdeUtils.getSchemaFor(BYTES_SCHEMA);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
// Column names
assertEquals(1, aoig.getColumnNames().size());
assertEquals("bytesField", aoig.getColumnNames().get(0));
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
assertTrue(typeInfo instanceof PrimitiveTypeInfo);
assertEquals(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), PrimitiveCategory.BINARY);
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class TestAvroObjectInspectorGenerator method canHandleFixed.
// Hive has no concept of Avro's fixed type. Fixed -> arrays of bytes
@Test
public void canHandleFixed() throws SerDeException {
Schema s = AvroSerdeUtils.getSchemaFor(FIXED_SCHEMA);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
// Column names
assertEquals(1, aoig.getColumnNames().size());
assertEquals("hash", aoig.getColumnNames().get(0));
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
assertTrue(typeInfo instanceof PrimitiveTypeInfo);
assertEquals(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), PrimitiveCategory.BINARY);
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class TestAvroObjectInspectorGenerator method convertsNullableTypes.
// That Union[T, NULL] is converted to just T.
@Test
public void convertsNullableTypes() throws SerDeException {
Schema s = AvroSerdeUtils.getSchemaFor(NULLABLE_STRING_SCHEMA);
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
assertEquals(1, aoig.getColumnNames().size());
assertEquals("nullableString", aoig.getColumnNames().get(0));
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
assertTrue(typeInfo instanceof PrimitiveTypeInfo);
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
// Verify the union has been hidden and just the main type has been returned.
assertEquals(PrimitiveObjectInspector.PrimitiveCategory.STRING, pti.getPrimitiveCategory());
}
Aggregations