Search in sources :

Example 1 with MutableColumnarRow

use of org.apache.spark.sql.execution.vectorized.MutableColumnarRow in project boostkit-bigdata by kunpengcompute.

the class PageToColumnar method transPageToColumnar.

public List<Object> transPageToColumnar(Iterator<WritableColumnVector[]> writableColumnVectors, boolean isVectorizedReader) {
    scala.collection.Iterator<StructField> structFieldIterator = structType.iterator();
    List<DataType> columnType = new ArrayList<>();
    while (structFieldIterator.hasNext()) {
        columnType.add(structFieldIterator.next().dataType());
    }
    List<Object> internalRowList = new ArrayList<>();
    while (writableColumnVectors.hasNext()) {
        WritableColumnVector[] columnVector = writableColumnVectors.next();
        if (columnVector == null) {
            continue;
        }
        int positionCount = columnVector[0].getElementsAppended();
        if (positionCount > 0) {
            if (isVectorizedReader) {
                ColumnarBatch columnarBatch = new ColumnarBatch(columnVector);
                columnarBatch.setNumRows(positionCount);
                internalRowList.add(columnarBatch);
            } else {
                for (int j = 0; j < positionCount; j++) {
                    MutableColumnarRow mutableColumnarRow = new MutableColumnarRow(columnVector);
                    mutableColumnarRow.rowId = j;
                    internalRowList.add(mutableColumnarRow);
                }
            }
        }
    }
    return internalRowList;
}
Also used : ColumnarBatch(org.apache.spark.sql.vectorized.ColumnarBatch) ArrayList(java.util.ArrayList) MutableColumnarRow(org.apache.spark.sql.execution.vectorized.MutableColumnarRow) StructField(org.apache.spark.sql.types.StructField) WritableColumnVector(org.apache.spark.sql.execution.vectorized.WritableColumnVector) DataType(org.apache.spark.sql.types.DataType)

Aggregations

ArrayList (java.util.ArrayList)1 MutableColumnarRow (org.apache.spark.sql.execution.vectorized.MutableColumnarRow)1 WritableColumnVector (org.apache.spark.sql.execution.vectorized.WritableColumnVector)1 DataType (org.apache.spark.sql.types.DataType)1 StructField (org.apache.spark.sql.types.StructField)1 ColumnarBatch (org.apache.spark.sql.vectorized.ColumnarBatch)1