use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.
the class CarbonDictionaryMetadataReaderImpl method read.
/**
* This method will be used to read complete metadata file.
* Applicable scenarios:
* 1. Query execution. Whenever a query is executed then to read the dictionary file
* and define the query scope first dictionary metadata has to be read first.
* 2. If dictionary file is read using start and end offset then using this meta list
* we can count the total number of dictionary chunks present between the 2 offsets
*
* @return list of all dictionary meta chunks which contains information for each segment
* @throws IOException if an I/O error occurs
*/
@Override
public List<CarbonDictionaryColumnMetaChunk> read() throws IOException {
List<CarbonDictionaryColumnMetaChunk> dictionaryMetaChunks = new ArrayList<CarbonDictionaryColumnMetaChunk>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
CarbonDictionaryColumnMetaChunk columnMetaChunk = null;
ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
// open dictionary meta thrift reader
openThriftReader();
// read till dictionary chunk count
while (dictionaryMetadataFileReader.hasNext()) {
// get the thrift object for dictionary chunk
dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
// create a new instance of chunk meta wrapper using thrift object
columnMetaChunk = getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
dictionaryMetaChunks.add(columnMetaChunk);
}
return dictionaryMetaChunks;
}
use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.
the class CarbonDictionaryWriterImplTest method testTruncateOperation.
/**
* This method will test the truncate functionality
*/
@Test
public void testTruncateOperation() throws IOException {
// delete store path
deleteStorePath();
// prepare first dictionary chunk
// prepare dictionary writer object
CarbonDictionaryWriterImpl writer = prepareWriter();
writeDictionaryFile(writer, dataSet1);
long endOffsetAfterFirstDictionaryChunk = CarbonUtil.getFileSize(dictionaryFilePath);
// maintain the offset till end offset of first chunk
writer = prepareWriter();
writeDictionaryFile(writer, dataSet2);
// prepare first column meta chunk object
ColumnDictionaryChunkMeta firstDictionaryChunkMeta = new ColumnDictionaryChunkMeta(1, 2, 0, endOffsetAfterFirstDictionaryChunk, 1);
// overwrite the dictionary meta chunk file to test the truncate operation
overwriteDictionaryMetaFile(firstDictionaryChunkMeta, dictionaryMetaFilePath);
writer = prepareWriter();
// in the next step truncate operation will be tested while writing dictionary file
writeDictionaryFile(writer, dataSet3);
// read dictionary file
List<byte[]> dictionaryValues = readDictionaryFile(0L, 0L);
List<String> actual = convertByteArrayListToStringValueList(dictionaryValues);
List<String> expected = new ArrayList<>(4);
expected.addAll(dataSet1);
expected.addAll(dataSet3);
// validate the data retrieved and it should match dataset1
compareDictionaryData(actual, expected);
}
use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.
the class ThriftReaderTest method testReadForException.
@Test(expected = java.io.IOException.class)
public void testReadForException() throws IOException {
ThriftReader.TBaseCreator tBaseCreator = new ThriftReader.TBaseCreator() {
@Override
public TBase create() {
return new ColumnDictionaryChunkMeta();
}
};
new MockUp<ColumnDictionaryChunkMeta>() {
@Mock
public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
throw new TException("TException Occur");
}
};
thriftReader = new ThriftReader("TestFile.carbon", tBaseCreator);
thriftReader.read();
}
use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.
the class CarbonDictionaryWriterImpl method writeDictionaryMetadataFile.
/**
* This method will write the dictionary metadata file for a given column
*
* @throws IOException if an I/O error occurs
*/
private void writeDictionaryMetadataFile() throws IOException {
// Format of dictionary metadata file
// min, max, start offset, end offset and chunk count
int min_surrogate_key = 0;
int max_surrogate_key = 0;
// both min and max equal to previous max
if (null != chunkMetaObjectForLastSegmentEntry) {
if (0 == totalRecordCount) {
min_surrogate_key = chunkMetaObjectForLastSegmentEntry.getMax_surrogate_key();
} else {
min_surrogate_key = chunkMetaObjectForLastSegmentEntry.getMax_surrogate_key() + 1;
}
max_surrogate_key = chunkMetaObjectForLastSegmentEntry.getMax_surrogate_key() + totalRecordCount;
} else {
if (totalRecordCount > 0) {
min_surrogate_key = 1;
}
max_surrogate_key = totalRecordCount;
}
ColumnDictionaryChunkMeta dictionaryChunkMeta = new ColumnDictionaryChunkMeta(min_surrogate_key, max_surrogate_key, chunk_start_offset, chunk_end_offset, chunk_count);
try {
openThriftWriter(this.dictionaryMetaFilePath);
// write dictionary metadata file
writeThriftObject(dictionaryChunkMeta);
LOGGER.info("Dictionary metadata file written successfully for column " + this.dictionaryColumnUniqueIdentifier.getColumnIdentifier() + " at path " + this.dictionaryMetaFilePath);
} finally {
closeThriftWriter();
}
}
use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.
the class CarbonDictionaryMetadataReaderImpl method readLastEntryOfDictionaryMetaChunk.
/**
* This method will be used to read only the last entry of dictionary meta chunk.
* Applicable scenarios :
* 1. Global dictionary generation for incremental load. In this case only the
* last dictionary chunk meta entry has to be read to calculate min, max surrogate
* key and start and end offset for the new dictionary chunk.
* 2. Truncate operation. While writing dictionary file in case of incremental load
* dictionary file needs to be validated for any inconsistency. Here end offset of last
* dictionary chunk meta is validated with file size.
*
* @return last segment entry for dictionary chunk
* @throws IOException if an I/O error occurs
*/
@Override
public CarbonDictionaryColumnMetaChunk readLastEntryOfDictionaryMetaChunk() throws IOException {
ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
// open dictionary meta thrift reader
openThriftReader();
// at the completion of while loop we will get the last dictionary chunk entry
while (dictionaryMetadataFileReader.hasNext()) {
// get the thrift object for dictionary chunk
dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
}
// create a new instance of chunk meta wrapper using thrift object
return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
}
Aggregations