use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.
the class RecordReaderImpl method nextMap.
static Map<Object, Object> nextMap(ColumnVector vector, int row, TypeDescription schema, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
MapColumnVector map = (MapColumnVector) vector;
int length = (int) map.lengths[row];
int offset = (int) map.offsets[row];
TypeDescription keyType = schema.getChildren().get(0);
TypeDescription valueType = schema.getChildren().get(1);
LinkedHashMap<Object, Object> result;
if (previous == null || previous.getClass() != LinkedHashMap.class) {
result = new LinkedHashMap<Object, Object>(length);
} else {
result = (LinkedHashMap<Object, Object>) previous;
// I couldn't think of a good way to reuse the keys and value objects
// without even more allocations, so take the easy and safe approach.
result.clear();
}
for (int e = 0; e < length; ++e) {
result.put(nextValue(map.keys, e + offset, keyType, null), nextValue(map.values, e + offset, valueType, null));
}
return result;
} else {
return null;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.
the class TestVectorizedMapColumnReader method testMapRead.
private void testMapRead(boolean isDictionaryEncoding, String type, int elementNum) throws Exception {
Configuration conf = new Configuration();
setTypeConfiguration(type, conf);
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createTestParquetReader(getSchema(type), conf);
VectorizedRowBatch previous = reader.createValue();
int row = 0;
int index = 0;
try {
while (reader.next(NullWritable.get(), previous)) {
MapColumnVector mapVector = (MapColumnVector) previous.cols[0];
// since Repeating only happens when offset length is 1.
assertEquals((mapVector.offsets.length == 1), mapVector.isRepeating);
for (int i = 0; i < mapVector.offsets.length; i++) {
if (row == elementNum) {
assertEquals(i, mapVector.offsets.length - 1);
break;
}
long start = mapVector.offsets[i];
long length = mapVector.lengths[i];
boolean isNull = isNull(row);
if (isNull) {
assertEquals(mapVector.isNull[i], true);
} else {
for (long j = 0; j < length; j++) {
assertValue(type, mapVector.keys, isDictionaryEncoding, index, (int) (start + j));
assertValue(type, mapVector.values, isDictionaryEncoding, index, (int) (start + j));
index++;
}
}
row++;
}
}
assertEquals("It doesn't exit at expected position", elementNum, row);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.
the class TestVectorizedMapColumnReader method testRepeateMapRead.
private void testRepeateMapRead(int elementNum, boolean isNull) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "map_int32_for_repeat_test");
conf.set(IOConstants.COLUMNS_TYPES, "map<int,int>");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
String schema = "message hive_schema {\n" + " repeated group map_int32_for_repeat_test (MAP_KEY_VALUE) {\n" + " required int32 key;\n" + " optional int32 value;\n" + " }\n" + "}\n";
VectorizedParquetRecordReader reader = createTestParquetReader(schema, conf);
VectorizedRowBatch previous = reader.createValue();
int row = 0;
try {
while (reader.next(NullWritable.get(), previous)) {
MapColumnVector mapVector = (MapColumnVector) previous.cols[0];
assertTrue(mapVector.isRepeating);
assertEquals(isNull, mapVector.isNull[0]);
for (int i = 0; i < mapVector.offsets.length; i++) {
if (row == elementNum) {
assertEquals(i, mapVector.offsets.length - 1);
break;
}
row++;
}
}
assertEquals("It doesn't exit at expected position", elementNum, row);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.
the class VectorUDFMapIndexBaseCol method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
// return immediately if batch is empty
final int n = batch.size;
if (n == 0) {
return;
}
if (childExpressions != null) {
super.evaluateChildren(batch);
}
ColumnVector outV = batch.cols[outputColumnNum];
MapColumnVector mapV = (MapColumnVector) batch.cols[inputColumnNum[0]];
// indexColumnVector includes the keys of Map
indexColumnVector = batch.cols[inputColumnNum[1]];
ColumnVector valuesV = mapV.values;
int[] sel = batch.selected;
boolean[] indexIsNull = indexColumnVector.isNull;
boolean[] mapIsNull = mapV.isNull;
boolean[] outputIsNull = outV.isNull;
// We do not need to do a column reset since we are carefully changing the output.
outV.isRepeating = false;
if (indexColumnVector.isRepeating) {
/*
* Repeated index or repeated NULL index.
*/
if (indexColumnVector.noNulls || !indexIsNull[0]) {
/*
* Same INDEX for entire batch.
*/
if (mapV.isRepeating) {
if (mapV.noNulls || !mapIsNull[0]) {
final int repeatedMapIndex = findInMap(indexColumnVector, 0, mapV, 0);
if (repeatedMapIndex == -1) {
outV.isNull[0] = true;
outV.noNulls = false;
} else {
outV.isNull[0] = false;
outV.setElement(0, repeatedMapIndex, valuesV);
}
} else {
outputIsNull[0] = true;
outV.noNulls = false;
}
outV.isRepeating = true;
return;
}
/*
* Individual row processing for LIST vector with *repeated* INDEX value.
*/
if (mapV.noNulls) {
if (batch.selectedInUse) {
if (!outV.noNulls) {
for (int j = 0; j < n; j++) {
final int i = sel[j];
final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
}
} else {
for (int j = 0; j < n; j++) {
final int i = sel[j];
final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.setElement(i, mapIndex, valuesV);
}
}
}
} else {
if (!outV.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outV.noNulls = true;
}
for (int i = 0; i < n; i++) {
final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.setElement(i, mapIndex, valuesV);
}
}
}
} else /* there are NULLs in the LIST */
{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!mapIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
} else {
for (int i = 0; i != n; i++) {
if (!mapIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
}
}
} else {
outputIsNull[0] = true;
outV.noNulls = false;
outV.isRepeating = true;
}
return;
}
if (mapV.isRepeating) {
if (mapV.noNulls || !mapIsNull[0]) {
if (indexColumnVector.noNulls) {
if (batch.selectedInUse) {
if (!outV.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.setElement(i, mapIndex, valuesV);
}
}
}
} else {
if (!outV.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outV.noNulls = true;
}
for (int i = 0; i != n; i++) {
final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.setElement(i, mapIndex, valuesV);
}
}
}
} else /* there are NULLs in the inputColVector */
{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!indexIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
} else {
for (int i = 0; i != n; i++) {
if (!indexIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
}
}
} else {
outputIsNull[0] = true;
outV.noNulls = false;
outV.isRepeating = true;
}
return;
}
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/*
* Individual row processing for INDEX vectors and LIST vectors.
*/
final boolean listNoNulls = mapV.noNulls;
if (indexColumnVector.noNulls) {
if (batch.selectedInUse) {
if (!outV.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
if (listNoNulls || !mapIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
if (listNoNulls || !mapIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
}
} else {
if (!outV.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outV.noNulls = true;
}
for (int i = 0; i != n; i++) {
if (listNoNulls || !mapIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
}
} else /* there are NULLs in the inputColVector */
{
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
if (!indexIsNull[i]) {
if (listNoNulls || !mapIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
} else {
for (int i = 0; i != n; i++) {
if (!indexIsNull[i]) {
if (listNoNulls || !mapIsNull[i]) {
final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
if (mapIndex == -1) {
outV.isNull[i] = true;
outV.noNulls = false;
} else {
outV.isNull[i] = false;
outV.setElement(i, mapIndex, valuesV);
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
} else {
outputIsNull[i] = true;
outV.noNulls = false;
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project flink by apache.
the class OrcBulkRowDataWriterTest method getResults.
private static List<RowData> getResults(Reader reader) throws IOException {
List<RowData> results = new ArrayList<>();
RecordReader recordReader = reader.rows();
VectorizedRowBatch batch = reader.getSchema().createRowBatch();
while (recordReader.nextBatch(batch)) {
BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0];
LongColumnVector intVector = (LongColumnVector) batch.cols[1];
ListColumnVector listVector = (ListColumnVector) batch.cols[2];
MapColumnVector mapVector = (MapColumnVector) batch.cols[3];
for (int r = 0; r < batch.size; r++) {
GenericRowData readRowData = new GenericRowData(4);
readRowData.setField(0, readStringData(stringVector, r));
readRowData.setField(1, readInt(intVector, r));
readRowData.setField(2, readList(listVector, r));
readRowData.setField(3, readMap(mapVector, r));
results.add(readRowData);
}
recordReader.close();
}
return results;
}
Aggregations