Search in sources :

Example 11 with ColumnIdentifier

use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.

the class ForwardDictionaryCacheTest method writeSortIndexFile.

/**
 * This method will prepare the sort index data from the given data and write
 * it to a sort index file
 *
 * @param data
 * @param columnId
 * @throws IOException
 */
private void writeSortIndexFile(List<String> data, String columnId) throws IOException {
    ColumnIdentifier columnIdentifier = new ColumnIdentifier(columnId, null, null);
    DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(identifier, columnIdentifier, columnIdentifier.getDataType());
    Map<String, Integer> dataToSurrogateKeyMap = new HashMap<>(data.size());
    int surrogateKey = 0;
    List<Integer> invertedIndexList = new ArrayList<>(data.size());
    for (int i = 0; i < data.size(); i++) {
        dataToSurrogateKeyMap.put(data.get(i), ++surrogateKey);
    }
    List<String> sortedKeyList = new ArrayList<>(dataToSurrogateKeyMap.keySet());
    Collections.sort(sortedKeyList);
    List<Integer> sortedIndexList = new ArrayList<>(data.size());
    int[] invertedIndexArray = new int[sortedKeyList.size()];
    for (int i = 0; i < sortedKeyList.size(); i++) {
        Integer key = dataToSurrogateKeyMap.get(sortedKeyList.get(i));
        sortedIndexList.add(key);
        invertedIndexArray[--key] = i + 1;
    }
    for (int i = 0; i < invertedIndexArray.length; i++) {
        invertedIndexList.add(invertedIndexArray[i]);
    }
    CarbonDictionarySortIndexWriter dictionarySortIndexWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
    try {
        dictionarySortIndexWriter.writeSortIndex(sortedIndexList);
        dictionarySortIndexWriter.writeInvertedSortIndex(invertedIndexList);
    } finally {
        dictionarySortIndexWriter.close();
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier)

Example 12 with ColumnIdentifier

use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
    String header = reader.readLine();
    String[] split = header.split(",");
    List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getFactTableName());
    allCols.addAll(dims);
    List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getFactTableName());
    allCols.addAll(msrs);
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, absoluteTableIdentifier.getStorePath());
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, absoluteTableIdentifier.getStorePath());
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Set(java.util.Set) HashSet(java.util.HashSet) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) FileReader(java.io.FileReader) HashSet(java.util.HashSet) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) BufferedReader(java.io.BufferedReader) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) Cache(org.apache.carbondata.core.cache.Cache)

Example 13 with ColumnIdentifier

use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.

the class ManageDictionaryAndBTree method removeDictionaryColumnFromCache.

/**
   * This method will remove dictionary cache from driver for both reverse and forward dictionary
   *
   * @param carbonTableIdentifier
   * @param storePath
   * @param columnId
   */
public static void removeDictionaryColumnFromCache(CarbonTableIdentifier carbonTableIdentifier, String storePath, String columnId) {
    Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, storePath);
    DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(carbonTableIdentifier, new ColumnIdentifier(columnId, null, null));
    dictCache.invalidate(dictionaryColumnUniqueIdentifier);
    dictCache = CacheProvider.getInstance().createCache(CacheType.FORWARD_DICTIONARY, storePath);
    dictCache.invalidate(dictionaryColumnUniqueIdentifier);
}
Also used : ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier)

Example 14 with ColumnIdentifier

use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
    String header = reader.readLine();
    String[] split = header.split(",");
    List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getTableName());
    allCols.addAll(dims);
    List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getTableName());
    allCols.addAll(msrs);
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(table.getAbsoluteTableIdentifier(), columnIdentifier, columnIdentifier.getDataType());
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(dictionaryColumnUniqueIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(identifier, columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Set(java.util.Set) HashSet(java.util.HashSet) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) FileReader(java.io.FileReader) HashSet(java.util.HashSet) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) BufferedReader(java.io.BufferedReader) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) Cache(org.apache.carbondata.core.cache.Cache)

Example 15 with ColumnIdentifier

use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.

the class ManageDictionaryAndBTree method removeDictionaryColumnFromCache.

/**
 * This method will remove dictionary cache from driver for both reverse and forward dictionary
 *
 * @param carbonTableIdentifier
 * @param columnId
 */
public static void removeDictionaryColumnFromCache(AbsoluteTableIdentifier carbonTableIdentifier, String columnId) {
    Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
    DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(carbonTableIdentifier, new ColumnIdentifier(columnId, null, null));
    dictCache.invalidate(dictionaryColumnUniqueIdentifier);
    dictCache = CacheProvider.getInstance().createCache(CacheType.FORWARD_DICTIONARY);
    dictCache.invalidate(dictionaryColumnUniqueIdentifier);
}
Also used : ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier)

Aggregations

ColumnIdentifier (org.apache.carbondata.core.metadata.ColumnIdentifier)22 DictionaryColumnUniqueIdentifier (org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier)11 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)7 CarbonDictionaryWriter (org.apache.carbondata.core.writer.CarbonDictionaryWriter)7 Dictionary (org.apache.carbondata.core.cache.dictionary.Dictionary)6 CarbonDictionaryWriterImpl (org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl)6 ArrayList (java.util.ArrayList)5 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)5 CarbonDictionarySortIndexWriter (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter)5 CarbonDictionarySortIndexWriterImpl (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl)5 Test (org.junit.Test)5 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)4 BeforeClass (org.junit.BeforeClass)4 BufferedReader (java.io.BufferedReader)3 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 Set (java.util.Set)3 Cache (org.apache.carbondata.core.cache.Cache)3 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)3 CarbonDictionarySortInfo (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo)3