Search in sources :

Example 6 with Dictionary

use of org.apache.carbondata.core.cache.dictionary.Dictionary in project carbondata by apache.

the class QueryUtil method getDictionaryMap.

/**
   * Below method will be used to get the column id to its dictionary mapping
   *
   * @param dictionaryColumnIdList  dictionary column list
   * @param absoluteTableIdentifier absolute table identifier
   * @return dictionary mapping
   * @throws IOException
   */
private static Map<String, Dictionary> getDictionaryMap(List<String> dictionaryColumnIdList, AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException {
    // this for dictionary unique identifier
    List<DictionaryColumnUniqueIdentifier> dictionaryColumnUniqueIdentifiers = getDictionaryColumnUniqueIdentifierList(dictionaryColumnIdList, absoluteTableIdentifier.getCarbonTableIdentifier());
    CacheProvider cacheProvider = CacheProvider.getInstance();
    Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY, absoluteTableIdentifier.getStorePath());
    List<Dictionary> columnDictionaryList = forwardDictionaryCache.getAll(dictionaryColumnUniqueIdentifiers);
    Map<String, Dictionary> columnDictionaryMap = new HashMap<>(columnDictionaryList.size());
    for (int i = 0; i < dictionaryColumnUniqueIdentifiers.size(); i++) {
        // TODO: null check for column dictionary, if cache size is less it
        // might return null here, in that case throw exception
        columnDictionaryMap.put(dictionaryColumnIdList.get(i), columnDictionaryList.get(i));
    }
    return columnDictionaryMap;
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) HashMap(java.util.HashMap) CacheProvider(org.apache.carbondata.core.cache.CacheProvider)

Example 7 with Dictionary

use of org.apache.carbondata.core.cache.dictionary.Dictionary in project carbondata by apache.

the class CarbonDictionarySortInfoPreparatorTest method testGetDictionarySortInfo.

/**
   * Tests the getDictionarySortInfo method
   */
@Test
public void testGetDictionarySortInfo() {
    List<String> newDistinctValues = new ArrayList<>();
    newDistinctValues.add("abc");
    newDistinctValues.add("xyz");
    Dictionary dictionary = new MockUp<Dictionary>() {

        @Mock
        public DictionaryChunksWrapper getDictionaryChunks() {
            List<byte[]> data = new ArrayList<>();
            data.add(new byte[] { 1, 2 });
            List<List<byte[]>> dictionaryChunks = new ArrayList<>();
            dictionaryChunks.add(data);
            return new DictionaryChunksWrapper(dictionaryChunks);
        }
    }.getMockInstance();
    new MockUp<DictionaryChunksWrapper>() {

        @Mock
        public int getSize() {
            return 1;
        }
    };
    CarbonDictionarySortInfo carbonDictionarySortInfo = carbonDictionarySortInfoPreparator.getDictionarySortInfo(newDistinctValues, dictionary, DataType.ARRAY);
    int expectedGetSortIndexValue = 1;
    int expectedGetSortInvertedIndexLength = 3;
    int actualGetSortIndexValue = carbonDictionarySortInfo.getSortIndex().get(0);
    int actualGetSortInvertedIndexLength = carbonDictionarySortInfo.getSortIndexInverted().size();
    assertEquals(actualGetSortIndexValue, expectedGetSortIndexValue);
    assertEquals(actualGetSortInvertedIndexLength, expectedGetSortInvertedIndexLength);
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) ArrayList(java.util.ArrayList) DictionaryChunksWrapper(org.apache.carbondata.core.cache.dictionary.DictionaryChunksWrapper) List(java.util.List) ArrayList(java.util.ArrayList) MockUp(mockit.MockUp) Mock(mockit.Mock) Test(org.junit.Test)

Example 8 with Dictionary

use of org.apache.carbondata.core.cache.dictionary.Dictionary in project carbondata by apache.

the class VectorizedCarbonRecordReader method close.

@Override
public void close() throws IOException {
    logStatistics(rowCount, queryModel.getStatisticsRecorder());
    if (columnarBatch != null) {
        columnarBatch.close();
        columnarBatch = null;
    }
    // clear dictionary cache
    Map<String, Dictionary> columnToDictionaryMapping = queryModel.getColumnToDictionaryMapping();
    if (null != columnToDictionaryMapping) {
        for (Map.Entry<String, Dictionary> entry : columnToDictionaryMapping.entrySet()) {
            CarbonUtil.clearDictionaryCache(entry.getValue());
        }
    }
    try {
        queryExecutor.finish();
    } catch (QueryExecutionException e) {
        throw new IOException(e);
    }
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) QueryExecutionException(org.apache.carbondata.core.scan.executor.exception.QueryExecutionException) IOException(java.io.IOException) Map(java.util.Map)

Example 9 with Dictionary

use of org.apache.carbondata.core.cache.dictionary.Dictionary in project carbondata by apache.

the class FilterUtil method getFilterListForAllValues.

/**
   * This method will get all the members of column from the forward dictionary
   * cache, this method will be basically used in row level filter resolver.
   *
   * @param tableIdentifier
   * @param expression
   * @param columnExpression
   * @param isIncludeFilter
   * @return DimColumnFilterInfo
   * @throws FilterUnsupportedException
   * @throws IOException
   */
public static DimColumnFilterInfo getFilterListForAllValues(AbsoluteTableIdentifier tableIdentifier, Expression expression, final ColumnExpression columnExpression, boolean isIncludeFilter) throws IOException, FilterUnsupportedException {
    Dictionary forwardDictionary = null;
    List<String> evaluateResultListFinal = new ArrayList<String>(20);
    DictionaryChunksWrapper dictionaryWrapper = null;
    try {
        forwardDictionary = getForwardDictionaryCache(tableIdentifier, columnExpression.getDimension());
        dictionaryWrapper = forwardDictionary.getDictionaryChunks();
        while (dictionaryWrapper.hasNext()) {
            byte[] columnVal = dictionaryWrapper.next();
            try {
                RowIntf row = new RowImpl();
                String stringValue = new String(columnVal, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
                if (stringValue.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL)) {
                    stringValue = null;
                }
                row.setValues(new Object[] { DataTypeUtil.getDataBasedOnDataType(stringValue, columnExpression.getCarbonColumn().getDataType()) });
                Boolean rslt = expression.evaluate(row).getBoolean();
                if (null != rslt && rslt == isIncludeFilter) {
                    if (null == stringValue) {
                        evaluateResultListFinal.add(CarbonCommonConstants.MEMBER_DEFAULT_VAL);
                    } else {
                        evaluateResultListFinal.add(stringValue);
                    }
                }
            } catch (FilterIllegalMemberException e) {
                LOGGER.debug(e.getMessage());
            }
        }
        return getFilterValues(columnExpression, evaluateResultListFinal, forwardDictionary, isIncludeFilter);
    } finally {
        CarbonUtil.clearDictionaryCache(forwardDictionary);
    }
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) ForwardDictionary(org.apache.carbondata.core.cache.dictionary.ForwardDictionary) RowImpl(org.apache.carbondata.core.scan.filter.intf.RowImpl) ArrayList(java.util.ArrayList) DictionaryChunksWrapper(org.apache.carbondata.core.cache.dictionary.DictionaryChunksWrapper) RowIntf(org.apache.carbondata.core.scan.filter.intf.RowIntf) FilterIllegalMemberException(org.apache.carbondata.core.scan.expression.exception.FilterIllegalMemberException)

Example 10 with Dictionary

use of org.apache.carbondata.core.cache.dictionary.Dictionary in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
    String header = reader.readLine();
    String[] split = header.split(",");
    List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getFactTableName());
    allCols.addAll(dims);
    List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getFactTableName());
    allCols.addAll(msrs);
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, absoluteTableIdentifier.getStorePath());
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, absoluteTableIdentifier.getStorePath());
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Set(java.util.Set) HashSet(java.util.HashSet) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) FileReader(java.io.FileReader) HashSet(java.util.HashSet) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) BufferedReader(java.io.BufferedReader) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) Cache(org.apache.carbondata.core.cache.Cache)

Aggregations

Dictionary (org.apache.carbondata.core.cache.dictionary.Dictionary)13 DictionaryColumnUniqueIdentifier (org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier)7 ArrayList (java.util.ArrayList)5 CacheProvider (org.apache.carbondata.core.cache.CacheProvider)3 ColumnIdentifier (org.apache.carbondata.core.metadata.ColumnIdentifier)3 CarbonDictionaryWriter (org.apache.carbondata.core.writer.CarbonDictionaryWriter)3 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 Map (java.util.Map)2 Set (java.util.Set)2 Cache (org.apache.carbondata.core.cache.Cache)2 DictionaryChunksWrapper (org.apache.carbondata.core.cache.dictionary.DictionaryChunksWrapper)2 ForwardDictionary (org.apache.carbondata.core.cache.dictionary.ForwardDictionary)2 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)2 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)2 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)2 QueryExecutionException (org.apache.carbondata.core.scan.executor.exception.QueryExecutionException)2 CarbonDictionaryWriterImpl (org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl)2