Search in sources :

Example 1 with DictionaryService

use of org.apache.carbondata.core.service.DictionaryService in project carbondata by apache.

the class IncrementalColumnDictionaryGenerator method writeDictionaryData.

@Override
public void writeDictionaryData(String tableUniqueName) throws IOException {
    // initialize params
    CarbonMetadata metadata = CarbonMetadata.getInstance();
    CarbonTable carbonTable = metadata.getCarbonTable(tableUniqueName);
    CarbonTableIdentifier tableIdentifier = carbonTable.getCarbonTableIdentifier();
    ColumnIdentifier columnIdentifier = dimension.getColumnIdentifier();
    String storePath = carbonTable.getStorePath();
    DictionaryService dictionaryService = CarbonCommonFactory.getDictionaryService();
    // create dictionary cache from dictionary File
    DictionaryColumnUniqueIdentifier identifier = new DictionaryColumnUniqueIdentifier(tableIdentifier, columnIdentifier, columnIdentifier.getDataType());
    Boolean isDictExists = CarbonUtil.isFileExistsForGivenColumn(storePath, identifier);
    Dictionary dictionary = null;
    long t1 = System.currentTimeMillis();
    if (isDictExists) {
        Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, storePath);
        dictionary = dictCache.get(identifier);
    }
    long dictCacheTime = System.currentTimeMillis() - t1;
    long t2 = System.currentTimeMillis();
    // write dictionary
    CarbonDictionaryWriter dictionaryWriter = null;
    dictionaryWriter = dictionaryService.getDictionaryWriter(tableIdentifier, columnIdentifier, storePath);
    List<String> distinctValues = writeDictionary(dictionaryWriter, isDictExists);
    long dictWriteTime = System.currentTimeMillis() - t2;
    long t3 = System.currentTimeMillis();
    // write sort index
    if (distinctValues.size() > 0) {
        writeSortIndex(distinctValues, dictionary, dictionaryService, tableIdentifier, columnIdentifier, storePath);
    }
    long sortIndexWriteTime = System.currentTimeMillis() - t3;
    // update Meta Data
    updateMetaData(dictionaryWriter);
    LOGGER.audit("\n columnName: " + dimension.getColName() + "\n columnId: " + dimension.getColumnId() + "\n new distinct values count: " + distinctValues.size() + "\n create dictionary cache: " + dictCacheTime + "\n sort list, distinct and write: " + dictWriteTime + "\n write sort info: " + sortIndexWriteTime);
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) BiDictionary(org.apache.carbondata.core.devapi.BiDictionary) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) DictionaryService(org.apache.carbondata.core.service.DictionaryService) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonMetadata(org.apache.carbondata.core.metadata.CarbonMetadata) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter)

Example 2 with DictionaryService

use of org.apache.carbondata.core.service.DictionaryService in project carbondata by apache.

the class AbstractDictionaryCache method readLastChunkFromDictionaryMetadataFile.

/**
 * This method will read dictionary metadata file and return the dictionary meta chunks
 *
 * @param dictionaryColumnUniqueIdentifier
 * @return list of dictionary metadata chunks
 * @throws IOException read and close method throws IO exception
 */
protected CarbonDictionaryColumnMetaChunk readLastChunkFromDictionaryMetadataFile(DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier) throws IOException {
    DictionaryService dictService = CarbonCommonFactory.getDictionaryService();
    CarbonDictionaryMetadataReader columnMetadataReaderImpl = dictService.getDictionaryMetadataReader(dictionaryColumnUniqueIdentifier);
    CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk = null;
    // read metadata file
    try {
        carbonDictionaryColumnMetaChunk = columnMetadataReaderImpl.readLastEntryOfDictionaryMetaChunk();
    } finally {
        // close the metadata reader
        columnMetadataReaderImpl.close();
    }
    return carbonDictionaryColumnMetaChunk;
}
Also used : DictionaryService(org.apache.carbondata.core.service.DictionaryService) CarbonDictionaryColumnMetaChunk(org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk) CarbonDictionaryMetadataReader(org.apache.carbondata.core.reader.CarbonDictionaryMetadataReader)

Example 3 with DictionaryService

use of org.apache.carbondata.core.service.DictionaryService in project carbondata by apache.

the class AbstractDictionaryCache method getNumRecordsInCarbonDictionaryColumnMetaChunk.

/**
 * get the dictionary column meta chunk for object already read and stored in LRU cache
 * @param dictionaryColumnUniqueIdentifier
 * @param offsetRead
 * @return
 * @throws IOException
 */
protected long getNumRecordsInCarbonDictionaryColumnMetaChunk(DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, long offsetRead) throws IOException {
    DictionaryService dictService = CarbonCommonFactory.getDictionaryService();
    CarbonDictionaryMetadataReader columnMetadataReaderImpl = dictService.getDictionaryMetadataReader(dictionaryColumnUniqueIdentifier);
    CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk = null;
    // read metadata file
    try {
        carbonDictionaryColumnMetaChunk = columnMetadataReaderImpl.readEntryOfDictionaryMetaChunk(offsetRead);
    } finally {
        // close the metadata reader
        columnMetadataReaderImpl.close();
    }
    return carbonDictionaryColumnMetaChunk.getMax_surrogate_key();
}
Also used : DictionaryService(org.apache.carbondata.core.service.DictionaryService) CarbonDictionaryColumnMetaChunk(org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk) CarbonDictionaryMetadataReader(org.apache.carbondata.core.reader.CarbonDictionaryMetadataReader)

Example 4 with DictionaryService

use of org.apache.carbondata.core.service.DictionaryService in project carbondata by apache.

the class IncrementalColumnDictionaryGenerator method writeDictionaryData.

@Override
public void writeDictionaryData() throws IOException {
    // initialize params
    AbsoluteTableIdentifier absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier();
    ColumnIdentifier columnIdentifier = dimension.getColumnIdentifier();
    DictionaryService dictionaryService = CarbonCommonFactory.getDictionaryService();
    // create dictionary cache from dictionary File
    DictionaryColumnUniqueIdentifier identifier = new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, columnIdentifier.getDataType());
    Boolean isDictExists = CarbonUtil.isFileExistsForGivenColumn(identifier);
    Dictionary dictionary = null;
    long t1 = System.currentTimeMillis();
    if (isDictExists) {
        Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
        dictionary = dictCache.get(identifier);
    }
    long dictCacheTime = System.currentTimeMillis() - t1;
    long t2 = System.currentTimeMillis();
    // write dictionary
    CarbonDictionaryWriter dictionaryWriter = null;
    dictionaryWriter = dictionaryService.getDictionaryWriter(identifier);
    List<String> distinctValues = writeDictionary(dictionaryWriter, isDictExists);
    long dictWriteTime = System.currentTimeMillis() - t2;
    long t3 = System.currentTimeMillis();
    // write sort index
    if (distinctValues.size() > 0) {
        writeSortIndex(distinctValues, dictionary, dictionaryService, absoluteTableIdentifier, columnIdentifier);
    }
    long sortIndexWriteTime = System.currentTimeMillis() - t3;
    // update Meta Data
    updateMetaData(dictionaryWriter);
    LOGGER.audit("\n columnName: " + dimension.getColName() + "\n columnId: " + dimension.getColumnId() + "\n new distinct values count: " + distinctValues.size() + "\n create dictionary cache: " + dictCacheTime + "\n sort list, distinct and write: " + dictWriteTime + "\n write sort info: " + sortIndexWriteTime);
    if (isDictExists) {
        CarbonUtil.clearDictionaryCache(dictionary);
    }
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) BiDictionary(org.apache.carbondata.core.devapi.BiDictionary) DictionaryService(org.apache.carbondata.core.service.DictionaryService) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter)

Aggregations

DictionaryService (org.apache.carbondata.core.service.DictionaryService)4 Dictionary (org.apache.carbondata.core.cache.dictionary.Dictionary)2 DictionaryColumnUniqueIdentifier (org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier)2 BiDictionary (org.apache.carbondata.core.devapi.BiDictionary)2 ColumnIdentifier (org.apache.carbondata.core.metadata.ColumnIdentifier)2 CarbonDictionaryColumnMetaChunk (org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk)2 CarbonDictionaryMetadataReader (org.apache.carbondata.core.reader.CarbonDictionaryMetadataReader)2 CarbonDictionaryWriter (org.apache.carbondata.core.writer.CarbonDictionaryWriter)2 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)1 CarbonMetadata (org.apache.carbondata.core.metadata.CarbonMetadata)1 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1