Search in sources :

Example 1 with IntegerRange

use of com.bakdata.conquery.models.common.Range.IntegerRange in project conquery by bakdata.

the class NumberTypeGuesser method createGuess.

@Override
public Guess createGuess() {
    // check if the remaining strings are all numbers
    try {
        Range<Integer> range = new IntegerRange(0, 0);
        IntegerParser numberParser = new IntegerParser(config);
        int[] intMap = new int[p.getStrings().size()];
        Arrays.fill(intMap, -1);
        for (Entry<String, Integer> e : p.getStrings().entrySet()) {
            int intValue = Integer.parseInt(e.getKey());
            // check that there are no leading zeroes that we would destroy
            if (e.getKey().startsWith("0") && !e.getKey().equals("0")) {
                return null;
            }
            intMap[e.getValue()] = intValue;
            range = range.span(new IntegerRange(intValue, intValue));
            numberParser.addLine((long) intValue);
        }
        numberParser.setLines(p.getLines());
        // do not use a number type if the range is much larger than the number if distinct values
        // e.g. if the column contains only 0 and 5M
        int span = range.getMax() - range.getMin() + 1;
        if (span > p.getStrings().size()) {
            return null;
        }
        IntegerStore decision = numberParser.findBestType();
        Range<Integer> finalRange = range;
        return new Guess(null, decision.estimateMemoryConsumptionBytes(), 0) {

            @Override
            public StringStore getType() {
                Int2ObjectMap<String> inverse = new Int2ObjectOpenHashMap<>(p.getStrings().size());
                p.getStrings().forEach((key, value) -> inverse.putIfAbsent((int) value, key));
                final StringTypeNumber type = new StringTypeNumber(finalRange, decision, inverse);
                return type;
            }
        };
    } catch (NumberFormatException e) {
        return null;
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) IntegerRange(com.bakdata.conquery.models.common.Range.IntegerRange) StringTypeNumber(com.bakdata.conquery.models.events.stores.specific.string.StringTypeNumber) IntegerStore(com.bakdata.conquery.models.events.stores.root.IntegerStore) IntegerParser(com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)

Aggregations

IntegerRange (com.bakdata.conquery.models.common.Range.IntegerRange)1 IntegerStore (com.bakdata.conquery.models.events.stores.root.IntegerStore)1 StringTypeNumber (com.bakdata.conquery.models.events.stores.specific.string.StringTypeNumber)1 IntegerParser (com.bakdata.conquery.models.preproc.parser.specific.IntegerParser)1 Int2ObjectOpenHashMap (it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap)1