Search in sources :

Example 1 with ColumnDictionaryChunkMeta

use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.

the class CarbonDictionaryMetadataReaderImpl method read.

/**
 * This method will be used to read complete metadata file.
 * Applicable scenarios:
 * 1. Query execution. Whenever a query is executed then to read the dictionary file
 * and define the query scope first dictionary metadata has to be read first.
 * 2. If dictionary file is read using start and end offset then using this meta list
 * we can count the total number of dictionary chunks present between the 2 offsets
 *
 * @return list of all dictionary meta chunks which contains information for each segment
 * @throws IOException if an I/O error occurs
 */
@Override
public List<CarbonDictionaryColumnMetaChunk> read() throws IOException {
    List<CarbonDictionaryColumnMetaChunk> dictionaryMetaChunks = new ArrayList<CarbonDictionaryColumnMetaChunk>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    CarbonDictionaryColumnMetaChunk columnMetaChunk = null;
    ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
    // open dictionary meta thrift reader
    openThriftReader();
    // read till dictionary chunk count
    while (dictionaryMetadataFileReader.hasNext()) {
        // get the thrift object for dictionary chunk
        dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
        // create a new instance of chunk meta wrapper using thrift object
        columnMetaChunk = getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
        dictionaryMetaChunks.add(columnMetaChunk);
    }
    return dictionaryMetaChunks;
}
Also used : ColumnDictionaryChunkMeta(org.apache.carbondata.format.ColumnDictionaryChunkMeta) ArrayList(java.util.ArrayList)

Example 2 with ColumnDictionaryChunkMeta

use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.

the class CarbonDictionaryWriterImplTest method testTruncateOperation.

/**
 * This method will test the truncate functionality
 */
@Test
public void testTruncateOperation() throws IOException {
    // delete store path
    deleteStorePath();
    // prepare first dictionary chunk
    // prepare dictionary writer object
    CarbonDictionaryWriterImpl writer = prepareWriter();
    writeDictionaryFile(writer, dataSet1);
    long endOffsetAfterFirstDictionaryChunk = CarbonUtil.getFileSize(dictionaryFilePath);
    // maintain the offset till end offset of first chunk
    writer = prepareWriter();
    writeDictionaryFile(writer, dataSet2);
    // prepare first column meta chunk object
    ColumnDictionaryChunkMeta firstDictionaryChunkMeta = new ColumnDictionaryChunkMeta(1, 2, 0, endOffsetAfterFirstDictionaryChunk, 1);
    // overwrite the dictionary meta chunk file to test the truncate operation
    overwriteDictionaryMetaFile(firstDictionaryChunkMeta, dictionaryMetaFilePath);
    writer = prepareWriter();
    // in the next step truncate operation will be tested while writing dictionary file
    writeDictionaryFile(writer, dataSet3);
    // read dictionary file
    List<byte[]> dictionaryValues = readDictionaryFile(0L, 0L);
    List<String> actual = convertByteArrayListToStringValueList(dictionaryValues);
    List<String> expected = new ArrayList<>(4);
    expected.addAll(dataSet1);
    expected.addAll(dataSet3);
    // validate the data retrieved and it should match dataset1
    compareDictionaryData(actual, expected);
}
Also used : ColumnDictionaryChunkMeta(org.apache.carbondata.format.ColumnDictionaryChunkMeta) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 3 with ColumnDictionaryChunkMeta

use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.

the class ThriftReaderTest method testReadForException.

@Test(expected = java.io.IOException.class)
public void testReadForException() throws IOException {
    ThriftReader.TBaseCreator tBaseCreator = new ThriftReader.TBaseCreator() {

        @Override
        public TBase create() {
            return new ColumnDictionaryChunkMeta();
        }
    };
    new MockUp<ColumnDictionaryChunkMeta>() {

        @Mock
        public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
            throw new TException("TException Occur");
        }
    };
    thriftReader = new ThriftReader("TestFile.carbon", tBaseCreator);
    thriftReader.read();
}
Also used : TException(org.apache.thrift.TException) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) ColumnDictionaryChunkMeta(org.apache.carbondata.format.ColumnDictionaryChunkMeta) MockUp(mockit.MockUp) Test(org.junit.Test)

Example 4 with ColumnDictionaryChunkMeta

use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.

the class CarbonDictionaryWriterImpl method writeDictionaryMetadataFile.

/**
 * This method will write the dictionary metadata file for a given column
 *
 * @throws IOException if an I/O error occurs
 */
private void writeDictionaryMetadataFile() throws IOException {
    // Format of dictionary metadata file
    // min, max, start offset, end offset and chunk count
    int min_surrogate_key = 0;
    int max_surrogate_key = 0;
    // both min and max equal to previous max
    if (null != chunkMetaObjectForLastSegmentEntry) {
        if (0 == totalRecordCount) {
            min_surrogate_key = chunkMetaObjectForLastSegmentEntry.getMax_surrogate_key();
        } else {
            min_surrogate_key = chunkMetaObjectForLastSegmentEntry.getMax_surrogate_key() + 1;
        }
        max_surrogate_key = chunkMetaObjectForLastSegmentEntry.getMax_surrogate_key() + totalRecordCount;
    } else {
        if (totalRecordCount > 0) {
            min_surrogate_key = 1;
        }
        max_surrogate_key = totalRecordCount;
    }
    ColumnDictionaryChunkMeta dictionaryChunkMeta = new ColumnDictionaryChunkMeta(min_surrogate_key, max_surrogate_key, chunk_start_offset, chunk_end_offset, chunk_count);
    try {
        openThriftWriter(this.dictionaryMetaFilePath);
        // write dictionary metadata file
        writeThriftObject(dictionaryChunkMeta);
        LOGGER.info("Dictionary metadata file written successfully for column " + this.dictionaryColumnUniqueIdentifier.getColumnIdentifier() + " at path " + this.dictionaryMetaFilePath);
    } finally {
        closeThriftWriter();
    }
}
Also used : ColumnDictionaryChunkMeta(org.apache.carbondata.format.ColumnDictionaryChunkMeta)

Example 5 with ColumnDictionaryChunkMeta

use of org.apache.carbondata.format.ColumnDictionaryChunkMeta in project carbondata by apache.

the class CarbonDictionaryMetadataReaderImpl method readLastEntryOfDictionaryMetaChunk.

/**
 * This method will be used to read only the last entry of dictionary meta chunk.
 * Applicable scenarios :
 * 1. Global dictionary generation for incremental load. In this case only the
 * last dictionary chunk meta entry has to be read to calculate min, max surrogate
 * key and start and end offset for the new dictionary chunk.
 * 2. Truncate operation. While writing dictionary file in case of incremental load
 * dictionary file needs to be validated for any inconsistency. Here end offset of last
 * dictionary chunk meta is validated with file size.
 *
 * @return last segment entry for dictionary chunk
 * @throws IOException if an I/O error occurs
 */
@Override
public CarbonDictionaryColumnMetaChunk readLastEntryOfDictionaryMetaChunk() throws IOException {
    ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
    // open dictionary meta thrift reader
    openThriftReader();
    // at the completion of while loop we will get the last dictionary chunk entry
    while (dictionaryMetadataFileReader.hasNext()) {
        // get the thrift object for dictionary chunk
        dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
    }
    // create a new instance of chunk meta wrapper using thrift object
    return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
}
Also used : ColumnDictionaryChunkMeta(org.apache.carbondata.format.ColumnDictionaryChunkMeta)

Aggregations

ColumnDictionaryChunkMeta (org.apache.carbondata.format.ColumnDictionaryChunkMeta)6 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 MockUp (mockit.MockUp)1 ThriftReader (org.apache.carbondata.core.reader.ThriftReader)1 TException (org.apache.thrift.TException)1