Search in sources :

Example 16 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class IncrementalColumnDictionaryGenerator method writeSortIndex.

/**
 * write dictionary sort index to file
 *
 * @param distinctValues
 * @param dictionary
 * @param dictionaryService
 * @param absoluteTableIdentifier
 * @param columnIdentifier
 * @throws IOException
 */
private void writeSortIndex(List<String> distinctValues, Dictionary dictionary, DictionaryService dictionaryService, AbsoluteTableIdentifier absoluteTableIdentifier, ColumnIdentifier columnIdentifier) throws IOException {
    CarbonDictionarySortIndexWriter carbonDictionarySortIndexWriter = null;
    DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, columnIdentifier.getDataType());
    try {
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(distinctValues, dictionary, dimension.getDataType());
        carbonDictionarySortIndexWriter = dictionaryService.getDictionarySortIndexWriter(dictionaryColumnUniqueIdentifier);
        carbonDictionarySortIndexWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
        carbonDictionarySortIndexWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
    } finally {
        if (null != carbonDictionarySortIndexWriter) {
            carbonDictionarySortIndexWriter.close();
        }
    }
}
Also used : DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter)

Example 17 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class IncrementalColumnDictionaryGenerator method writeDictionaryData.

@Override
public void writeDictionaryData() throws IOException {
    // initialize params
    AbsoluteTableIdentifier absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier();
    ColumnIdentifier columnIdentifier = dimension.getColumnIdentifier();
    DictionaryService dictionaryService = CarbonCommonFactory.getDictionaryService();
    // create dictionary cache from dictionary File
    DictionaryColumnUniqueIdentifier identifier = new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, columnIdentifier.getDataType());
    Boolean isDictExists = CarbonUtil.isFileExistsForGivenColumn(identifier);
    Dictionary dictionary = null;
    long t1 = System.currentTimeMillis();
    if (isDictExists) {
        Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
        dictionary = dictCache.get(identifier);
    }
    long dictCacheTime = System.currentTimeMillis() - t1;
    long t2 = System.currentTimeMillis();
    // write dictionary
    CarbonDictionaryWriter dictionaryWriter = null;
    dictionaryWriter = dictionaryService.getDictionaryWriter(identifier);
    List<String> distinctValues = writeDictionary(dictionaryWriter, isDictExists);
    long dictWriteTime = System.currentTimeMillis() - t2;
    long t3 = System.currentTimeMillis();
    // write sort index
    if (distinctValues.size() > 0) {
        writeSortIndex(distinctValues, dictionary, dictionaryService, absoluteTableIdentifier, columnIdentifier);
    }
    long sortIndexWriteTime = System.currentTimeMillis() - t3;
    // update Meta Data
    updateMetaData(dictionaryWriter);
    LOGGER.audit("\n columnName: " + dimension.getColName() + "\n columnId: " + dimension.getColumnId() + "\n new distinct values count: " + distinctValues.size() + "\n create dictionary cache: " + dictCacheTime + "\n sort list, distinct and write: " + dictWriteTime + "\n write sort info: " + sortIndexWriteTime);
    if (isDictExists) {
        CarbonUtil.clearDictionaryCache(dictionary);
    }
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) BiDictionary(org.apache.carbondata.core.devapi.BiDictionary) DictionaryService(org.apache.carbondata.core.service.DictionaryService) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter)

Example 18 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(factFilePath), "UTF-8"));
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getTableName());
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(table.getAbsoluteTableIdentifier(), columnIdentifier, columnIdentifier.getDataType());
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(dictionaryColumnUniqueIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) Set(java.util.Set) HashSet(java.util.HashSet) InputStreamReader(java.io.InputStreamReader) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) FileInputStream(java.io.FileInputStream) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) BufferedReader(java.io.BufferedReader) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) HashSet(java.util.HashSet) Cache(org.apache.carbondata.core.cache.Cache)

Example 19 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class CarbonDictionaryDecodeReadSupport method initialize.

/**
 * This initialization is done inside executor task
 * for column dictionary involved in decoding.
 *
 * @param carbonColumns           column list
 * @param carbonTable table identifier
 */
@Override
public void initialize(CarbonColumn[] carbonColumns, CarbonTable carbonTable) throws IOException {
    this.carbonColumns = carbonColumns;
    dictionaries = new Dictionary[carbonColumns.length];
    dataTypes = new DataType[carbonColumns.length];
    for (int i = 0; i < carbonColumns.length; i++) {
        if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) {
            CacheProvider cacheProvider = CacheProvider.getInstance();
            Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY);
            dataTypes[i] = carbonColumns[i].getDataType();
            String dictionaryPath = carbonTable.getTableInfo().getFactTable().getTableProperties().get(CarbonCommonConstants.DICTIONARY_PATH);
            dictionaries[i] = forwardDictionaryCache.get(new DictionaryColumnUniqueIdentifier(carbonTable.getAbsoluteTableIdentifier(), carbonColumns[i].getColumnIdentifier(), dataTypes[i], dictionaryPath));
        } else {
            dataTypes[i] = carbonColumns[i].getDataType();
        }
    }
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CacheProvider(org.apache.carbondata.core.cache.CacheProvider)

Example 20 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class DictionaryDecodeReadSupport method initialize.

/**
 * This initialization is done inside executor task
 * for column dictionary involved in decoding.
 *
 * @param carbonColumns column list
 * @param carbonTable table identifier
 */
@Override
public void initialize(CarbonColumn[] carbonColumns, CarbonTable carbonTable) throws IOException {
    this.carbonColumns = carbonColumns;
    dictionaries = new Dictionary[carbonColumns.length];
    dataTypes = new DataType[carbonColumns.length];
    for (int i = 0; i < carbonColumns.length; i++) {
        if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) {
            CacheProvider cacheProvider = CacheProvider.getInstance();
            Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY);
            dataTypes[i] = carbonColumns[i].getDataType();
            String dictionaryPath = carbonTable.getTableInfo().getFactTable().getTableProperties().get(CarbonCommonConstants.DICTIONARY_PATH);
            dictionaries[i] = forwardDictionaryCache.get(new DictionaryColumnUniqueIdentifier(carbonTable.getAbsoluteTableIdentifier(), carbonColumns[i].getColumnIdentifier(), dataTypes[i], dictionaryPath));
        } else {
            dataTypes[i] = carbonColumns[i].getDataType();
        }
    }
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CacheProvider(org.apache.carbondata.core.cache.CacheProvider)

Aggregations

DictionaryColumnUniqueIdentifier (org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier)22 Dictionary (org.apache.carbondata.core.cache.dictionary.Dictionary)14 ColumnIdentifier (org.apache.carbondata.core.metadata.ColumnIdentifier)11 CacheProvider (org.apache.carbondata.core.cache.CacheProvider)7 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)6 CarbonDictionaryWriter (org.apache.carbondata.core.writer.CarbonDictionaryWriter)6 ArrayList (java.util.ArrayList)5 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)5 CarbonDictionaryWriterImpl (org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl)5 CarbonDictionarySortIndexWriter (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter)5 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)4 CarbonDictionarySortIndexWriterImpl (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl)4 CarbonDictionarySortInfo (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo)4 CarbonDictionarySortInfoPreparator (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator)4 BufferedReader (java.io.BufferedReader)3 HashSet (java.util.HashSet)3 Set (java.util.Set)3 Cache (org.apache.carbondata.core.cache.Cache)3 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)3 FileReader (java.io.FileReader)2