Search in sources :

Example 1 with IntegerParser

use of com.bakdata.conquery.models.preproc.parser.specific.IntegerParser in project conquery by bakdata.

the class NumberTypeGuesser method createGuess.

@Override
public Guess createGuess() {
    // check if the remaining strings are all numbers
    try {
        Range<Integer> range = new IntegerRange(0, 0);
        IntegerParser numberParser = new IntegerParser(config);
        int[] intMap = new int[p.getStrings().size()];
        Arrays.fill(intMap, -1);
        for (Entry<String, Integer> e : p.getStrings().entrySet()) {
            int intValue = Integer.parseInt(e.getKey());
            // check that there are no leading zeroes that we would destroy
            if (e.getKey().startsWith("0") && !e.getKey().equals("0")) {
                return null;
            }
            intMap[e.getValue()] = intValue;
            range = range.span(new IntegerRange(intValue, intValue));
            numberParser.addLine((long) intValue);
        }
        numberParser.setLines(p.getLines());
        // do not use a number type if the range is much larger than the number if distinct values
        // e.g. if the column contains only 0 and 5M
        int span = range.getMax() - range.getMin() + 1;
        if (span > p.getStrings().size()) {
            return null;
        }
        IntegerStore decision = numberParser.findBestType();
        Range<Integer> finalRange = range;
        return new Guess(null, decision.estimateMemoryConsumptionBytes(), 0) {

            @Override
            public StringStore getType() {
                Int2ObjectMap<String> inverse = new Int2ObjectOpenHashMap<>(p.getStrings().size());
                p.getStrings().forEach((key, value) -> inverse.putIfAbsent((int) value, key));
                final StringTypeNumber type = new StringTypeNumber(finalRange, decision, inverse);
                return type;
            }
        };
    } catch (NumberFormatException e) {
        return null;
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) IntegerRange(com.bakdata.conquery.models.common.Range.IntegerRange) StringTypeNumber(com.bakdata.conquery.models.events.stores.specific.string.StringTypeNumber) IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)

Example 2 with IntegerParser

use of com.bakdata.conquery.models.preproc.parser.specific.IntegerParser in project conquery by bakdata.

the class ImportJob method applyDictionaryMappings.

/**
 * Apply new positions into incoming shared dictionaries.
 */
private void applyDictionaryMappings(Map<String, DictionaryMapping> mappings, Map<String, ColumnStore> values) {
    final ProgressReporter subJob = getProgressReporter().subJob(mappings.size());
    for (Map.Entry<String, DictionaryMapping> entry : mappings.entrySet()) {
        final String columnName = entry.getKey();
        final DictionaryMapping mapping = entry.getValue();
        final StringStore stringStore = (StringStore) values.get(columnName);
        log.debug("Remapping Column[{}] = {} with {}", columnName, stringStore, mapping);
        // we need to find a new Type for the index-Column as it's going to be remapped and might change in size
        final IntegerParser indexParser = new IntegerParser(config);
        final IntSummaryStatistics statistics = mapping.target().intStream().summaryStatistics();
        indexParser.setLines(stringStore.getLines());
        indexParser.setMinValue(statistics.getMin());
        indexParser.setMaxValue(statistics.getMax());
        final IntegerStore newType = indexParser.findBestType();
        log.trace("Decided for {}", newType);
        mapping.applyToStore(stringStore, newType);
        stringStore.setIndexStore(newType);
        subJob.report(1);
    }
}
Also used : IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) ProgressReporter(com.bakdata.conquery.util.progressreporter.ProgressReporter) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) DictionaryMapping(com.bakdata.conquery.models.dictionary.DictionaryMapping) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)

Aggregations

IntegerStore (com.bakdata.conquery.models.events.stores.root.IntegerStore)2 IntegerParser (com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)2 IntegerRange (com.bakdata.conquery.models.common.Range.IntegerRange)1 DictionaryMapping (com.bakdata.conquery.models.dictionary.DictionaryMapping)1 StringStore (com.bakdata.conquery.models.events.stores.root.StringStore)1 StringTypeNumber (com.bakdata.conquery.models.events.stores.specific.string.StringTypeNumber)1 ProgressReporter (com.bakdata.conquery.util.progressreporter.ProgressReporter)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1