use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project cdap by caskdata.
the class SimpleMapEqualComparerTest method testIncompatibleType.
@Test
public void testIncompatibleType() throws SerDeException, IOException {
// empty maps
StringTextMapHolder o1 = new StringTextMapHolder();
StructObjectInspector oi1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringTextMapHolder.class);
LazySimpleSerDe serde = new LazySimpleSerDe();
Configuration conf = new Configuration();
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1));
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1));
LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
serde.initialize(conf, tbl);
ObjectInspector oi2 = serde.getObjectInspector();
Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
int rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new SimpleMapEqualComparer());
Assert.assertEquals(0, rc);
// equal maps
o1.mMap.put("42", new Text("The answer to Life, Universe And Everything"));
o1.mMap.put("1729", new Text("A taxi cab number"));
o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new SimpleMapEqualComparer());
Assert.assertFalse(0 == rc);
}
use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project cdap by caskdata.
the class SimpleMapEqualComparerTest method testCompatibleType.
@Test
public void testCompatibleType() throws SerDeException, IOException {
// empty maps
TextStringMapHolder o1 = new TextStringMapHolder();
StructObjectInspector oi1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(TextStringMapHolder.class);
LazySimpleSerDe serde = new LazySimpleSerDe();
Configuration conf = new Configuration();
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1));
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1));
LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
serde.initialize(conf, tbl);
ObjectInspector oi2 = serde.getObjectInspector();
Object o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
int rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new SimpleMapEqualComparer());
Assert.assertEquals(0, rc);
// equal maps
o1.mMap.put(new Text("42"), "The answer to Life, Universe And Everything");
o1.mMap.put(new Text("1729"), "A taxi cab number");
o2 = serializeAndDeserialize(o1, oi1, serde, serdeParams);
rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new SimpleMapEqualComparer());
Assert.assertEquals(0, rc);
// unequal maps
o1.mMap.put(new Text("1729"), "Hardy-Ramanujan Number");
rc = ObjectInspectorUtils.compare(o1, oi1, o2, oi2, new SimpleMapEqualComparer());
Assert.assertFalse(0 == rc);
}
use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testWriteMap.
@Test
public void testWriteMap() throws Exception {
Instance inst = new MockInstance(test.getMethodName());
Connector conn = inst.getConnector("root", new PasswordToken(""));
HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
String table = test.getMethodName();
conn.tableOperations().create(table);
JobConf conf = new JobConf();
conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
conf.set(AccumuloConnectionParameters.USER_NAME, "root");
conf.set(AccumuloConnectionParameters.USER_PASS, "");
conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
FileSystem local = FileSystem.getLocal(conf);
outputFormat.checkOutputSpecs(local, conf);
RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
List<String> names = Arrays.asList("row", "col1");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:*");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(Arrays.asList("row", "data"), Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef bytes = new ByteArrayRef();
bytes.setData("row cq1:value1,cq2:value2".getBytes());
struct.init(bytes, 0, bytes.getData().length);
// Serialize the struct into a mutation
Mutation m = serializer.serialize(struct, structOI);
// Write the mutation
recordWriter.write(new Text(table), m);
// Close the writer
recordWriter.close(null);
Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
Entry<Key, Value> entry = iter.next();
Key k = entry.getKey();
Value v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq1", k.getColumnQualifier().toString());
Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, k.getColumnVisibilityParsed());
Assert.assertEquals("value1", new String(v.get()));
Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
entry = iter.next();
k = entry.getKey();
v = entry.getValue();
Assert.assertEquals("row", k.getRow().toString());
Assert.assertEquals("cf", k.getColumnFamily().toString());
Assert.assertEquals("cq2", k.getColumnQualifier().toString());
Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, k.getColumnVisibilityParsed());
Assert.assertEquals("value2", new String(v.get()));
Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.
the class MultiDelimitSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// get the SerDe parameters
super.initialize(conf, tbl);
serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
fieldDelimited = tbl.getProperty(serdeConstants.FIELD_DELIM);
if (fieldDelimited == null || fieldDelimited.isEmpty()) {
throw new SerDeException("This table does not have serde property \"field.delim\"!");
}
// get the collection separator and map key separator
// TODO: use serdeConstants.COLLECTION_DELIM when the typo is fixed
collSep = LazyUtils.getByte(tbl.getProperty(COLLECTION_DELIM), DEFAULT_SEPARATORS[1]);
keySep = LazyUtils.getByte(tbl.getProperty(serdeConstants.MAPKEY_DELIM), DEFAULT_SEPARATORS[2]);
serdeParams.setSeparator(1, collSep);
serdeParams.setSeparator(2, keySep);
// Create the ObjectInspectors for the fields
cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams.getSeparators(), serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
assert serdeParams.getColumnNames().size() == serdeParams.getColumnTypes().size();
numColumns = serdeParams.getColumnNames().size();
}
use of org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters in project hive by apache.
the class TestVectorSerDeRow method testVectorDeserializeRow.
void testVectorDeserializeRow(Random r, SerializationType serializationType, boolean alternate1, boolean alternate2, boolean useExternalBuffer) throws HiveException, IOException, SerDeException {
String[] emptyScratchTypeNames = new String[0];
VectorRandomRowSource source = new VectorRandomRowSource();
source.init(r);
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
// junk the destination for the 1st pass
for (ColumnVector cv : batch.cols) {
Arrays.fill(cv.isNull, true);
}
PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
int fieldCount = source.typeNames().size();
DeserializeRead deserializeRead;
SerializeWrite serializeWrite;
switch(serializationType) {
case BINARY_SORTABLE:
boolean useColumnSortOrderIsDesc = alternate1;
if (!useColumnSortOrderIsDesc) {
deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer);
serializeWrite = new BinarySortableSerializeWrite(fieldCount);
} else {
boolean[] columnSortOrderIsDesc = new boolean[fieldCount];
for (int i = 0; i < fieldCount; i++) {
columnSortOrderIsDesc[i] = r.nextBoolean();
}
deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, columnSortOrderIsDesc);
byte[] columnNullMarker = new byte[fieldCount];
byte[] columnNotNullMarker = new byte[fieldCount];
for (int i = 0; i < fieldCount; i++) {
if (columnSortOrderIsDesc[i]) {
// Descending
// Null last (default for descending order)
columnNullMarker[i] = BinarySortableSerDe.ZERO;
columnNotNullMarker[i] = BinarySortableSerDe.ONE;
} else {
// Ascending
// Null first (default for ascending order)
columnNullMarker[i] = BinarySortableSerDe.ZERO;
columnNotNullMarker[i] = BinarySortableSerDe.ONE;
}
}
serializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
}
boolean useBinarySortableCharsNeedingEscape = alternate2;
if (useBinarySortableCharsNeedingEscape) {
source.addBinarySortableAlphabets();
}
break;
case LAZY_BINARY:
deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer);
serializeWrite = new LazyBinarySerializeWrite(fieldCount);
break;
case LAZY_SIMPLE:
{
StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
Configuration conf = new Configuration();
Properties tbl = new Properties();
tbl.setProperty(serdeConstants.FIELD_DELIM, "\t");
tbl.setProperty(serdeConstants.LINE_DELIM, "\n");
byte separator = (byte) '\t';
boolean useLazySimpleEscapes = alternate1;
if (useLazySimpleEscapes) {
tbl.setProperty(serdeConstants.QUOTE_CHAR, "'");
String escapeString = "\\";
tbl.setProperty(serdeConstants.ESCAPE_CHAR, escapeString);
}
LazySerDeParameters lazySerDeParams = getSerDeParams(conf, tbl, rowObjectInspector);
if (useLazySimpleEscapes) {
// LazySimple seems to throw away everything but \n and \r.
boolean[] needsEscape = lazySerDeParams.getNeedsEscape();
StringBuilder sb = new StringBuilder();
if (needsEscape['\n']) {
sb.append('\n');
}
if (needsEscape['\r']) {
sb.append('\r');
}
// for (int i = 0; i < needsEscape.length; i++) {
// if (needsEscape[i]) {
// sb.append((char) i);
// }
// }
String needsEscapeStr = sb.toString();
if (needsEscapeStr.length() > 0) {
source.addEscapables(needsEscapeStr);
}
}
deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, separator, lazySerDeParams);
serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
}
break;
default:
throw new Error("Unknown serialization type " + serializationType);
}
VectorDeserializeRow vectorDeserializeRow = new VectorDeserializeRow(deserializeRead);
vectorDeserializeRow.init();
// junk the destination for the 1st pass
for (ColumnVector cv : batch.cols) {
Arrays.fill(cv.isNull, true);
cv.noNulls = false;
}
VectorExtractRow vectorExtractRow = new VectorExtractRow();
vectorExtractRow.init(source.typeNames());
Object[][] randomRows = source.randomRows(100000);
int firstRandomRowIndex = 0;
for (int i = 0; i < randomRows.length; i++) {
Object[] row = randomRows[i];
Output output = serializeRow(row, source, serializeWrite);
vectorDeserializeRow.setBytes(output.getData(), 0, output.getLength());
try {
vectorDeserializeRow.deserialize(batch, batch.size);
} catch (Exception e) {
throw new HiveException("\nDeserializeRead details: " + vectorDeserializeRow.getDetailedReadPositionString(), e);
}
batch.size++;
if (batch.size == batch.DEFAULT_SIZE) {
examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex);
firstRandomRowIndex = i + 1;
batch.reset();
}
}
if (batch.size > 0) {
examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex);
}
}
Aggregations