use of org.apache.carbondata.hadoop.CarbonInputSplit in project carbondata by apache.
the class PaginationCarbonReader method getRows.
private Object[] getRows(long fromRowNumber, long toRowNumber) throws IOException, InterruptedException {
int rowCount = 0;
Object[] rows = new Object[(int) (toRowNumber - fromRowNumber + 1)];
// get the matching split index (blocklets) range for the input range.
Range blockletIndexRange = getBlockletIndexRange(fromRowNumber, toRowNumber);
for (int i = blockletIndexRange.getFrom(); i <= blockletIndexRange.getTo(); i++) {
String blockletUniqueId = String.valueOf(i);
BlockletRows blockletRows;
if (cache.get(blockletUniqueId) != null) {
blockletRows = (BlockletRows) cache.get(blockletUniqueId);
} else {
BlockletDetailInfo detailInfo = ((CarbonInputSplit) allBlockletSplits.get(i)).getDetailInfo();
List<Object> rowsInBlocklet = new ArrayList<>();
// read the rows from the blocklet
// TODO: read blocklets in multi-thread if there is a performance requirement.
readerBuilder.setInputSplit(allBlockletSplits.get(i));
CarbonReader<Object> carbonReader = readerBuilder.build();
while (carbonReader.hasNext()) {
rowsInBlocklet.add(carbonReader.readNextRow());
}
carbonReader.close();
long fromRowId;
if (i == 0) {
fromRowId = 1;
} else {
// previous index will contain the sum of rows till previous blocklet.
fromRowId = rowCountInSplits.get(i - 1) + 1;
}
blockletRows = new BlockletRows(fromRowId, detailInfo.getBlockSize(), rowsInBlocklet.toArray());
// add entry to cache with no expiry time
// key: unique blocklet id
// value: BlockletRows
cache.put(String.valueOf(i), blockletRows, blockletRows.getMemorySize(), Integer.MAX_VALUE);
}
long fromBlockletRow = blockletRows.getRowIdStartIndex();
long toBlockletRow = fromBlockletRow + blockletRows.getRowsCount();
Object[] rowsInBlocklet = blockletRows.getRows();
if (toRowNumber >= toBlockletRow) {
if (fromRowNumber >= fromBlockletRow) {
// only fromRowNumber lies in this blocklet,
// read from fromRowNumber to end of the blocklet.
// -1 because row id starts form 0
int start = (int) (fromRowNumber - blockletRows.getRowIdStartIndex());
int end = blockletRows.getRowsCount();
while (start < end) {
rows[rowCount++] = rowsInBlocklet[start++];
}
} else {
// both fromRowNumber and toRowNumber doesn't lie in this blocklet.
// Read the whole blocklet.
System.arraycopy(rowsInBlocklet, 0, rows, rowCount, rowsInBlocklet.length);
rowCount += rowsInBlocklet.length;
}
} else {
if (fromRowNumber >= fromBlockletRow) {
// both fromRowNumber and toRowNumber exist in this blocklet itself.
// prune it and fill the results.
int start = (int) (fromRowNumber - blockletRows.getRowIdStartIndex());
int end = (int) (start + (toRowNumber + 1 - fromRowNumber));
while (start < end) {
rows[rowCount++] = rowsInBlocklet[start++];
}
} else {
// toRowNumber lies in this blocklet. Read from Starting of blocklet to toRowNumber.
int start = 0;
int end = (int) (toRowNumber + 1 - blockletRows.getRowIdStartIndex());
while (start < end) {
rows[rowCount++] = rowsInBlocklet[start++];
}
}
}
}
return rows;
}
use of org.apache.carbondata.hadoop.CarbonInputSplit in project carbondata by apache.
the class CarbonStreamRecordReaderTest method buildInputSplit.
private InputSplit buildInputSplit() throws IOException {
CarbonInputSplit carbonInputSplit = new CarbonInputSplit();
List<CarbonInputSplit> splitList = new ArrayList<>();
splitList.add(carbonInputSplit);
return new CarbonMultiBlockSplit(splitList, new String[] { "localhost" }, FileFormat.ROW_V1);
}
use of org.apache.carbondata.hadoop.CarbonInputSplit in project carbondata by apache.
the class ExtendedBlocklet method deserializeFields.
/**
* Method to deserialize extended blocklet and input split for index server
* @param in data input stream to read the primitives of extended blocklet
* @param locations locations of the input split
* @param tablePath carbon table path
* @throws IOException
*/
public void deserializeFields(DataInput in, String[] locations, String tablePath, boolean isCountJob, CdcVO cdcVO) throws IOException {
super.readFields(in);
if (isCountJob) {
count = in.readLong();
segmentNo = in.readUTF();
return;
} else if (cdcVO != null) {
filePath = in.readUTF();
this.columnToMinMaxMapping = new HashMap<>();
for (String column : cdcVO.getColumnToIndexMap().keySet()) {
List<FilePathMinMaxVO> minMaxOfColumnInList = new ArrayList<>();
int minLength = in.readInt();
byte[] minValuesForBlocklets = new byte[minLength];
in.readFully(minValuesForBlocklets);
int maxLength = in.readInt();
byte[] maxValuesForBlocklets = new byte[maxLength];
in.readFully(maxValuesForBlocklets);
minMaxOfColumnInList.add(new FilePathMinMaxVO(filePath, minValuesForBlocklets, maxValuesForBlocklets));
this.columnToMinMaxMapping.put(column, minMaxOfColumnInList);
}
return;
}
if (in.readBoolean()) {
indexUniqueId = in.readUTF();
}
boolean isSplitPresent = in.readBoolean();
if (isSplitPresent) {
String filePath = getPath();
boolean isExternalPath = in.readBoolean();
if (!isExternalPath) {
setFilePath(tablePath + filePath);
} else {
setFilePath(filePath);
}
// getting the length of the data
final int serializeLen = in.readInt();
this.inputSplit = new CarbonInputSplit(serializeLen, in, getFilePath(), locations, getBlockletId());
}
}
Aggregations