use of com.bakdata.conquery.models.preproc.parser.specific.IntegerParser in project conquery by bakdata.
the class NumberTypeGuesser method createGuess.
@Override
public Guess createGuess() {
// check if the remaining strings are all numbers
try {
Range<Integer> range = new IntegerRange(0, 0);
IntegerParser numberParser = new IntegerParser(config);
int[] intMap = new int[p.getStrings().size()];
Arrays.fill(intMap, -1);
for (Entry<String, Integer> e : p.getStrings().entrySet()) {
int intValue = Integer.parseInt(e.getKey());
// check that there are no leading zeroes that we would destroy
if (e.getKey().startsWith("0") && !e.getKey().equals("0")) {
return null;
}
intMap[e.getValue()] = intValue;
range = range.span(new IntegerRange(intValue, intValue));
numberParser.addLine((long) intValue);
}
numberParser.setLines(p.getLines());
// do not use a number type if the range is much larger than the number if distinct values
// e.g. if the column contains only 0 and 5M
int span = range.getMax() - range.getMin() + 1;
if (span > p.getStrings().size()) {
return null;
}
IntegerStore decision = numberParser.findBestType();
Range<Integer> finalRange = range;
return new Guess(null, decision.estimateMemoryConsumptionBytes(), 0) {
@Override
public StringStore getType() {
Int2ObjectMap<String> inverse = new Int2ObjectOpenHashMap<>(p.getStrings().size());
p.getStrings().forEach((key, value) -> inverse.putIfAbsent((int) value, key));
final StringTypeNumber type = new StringTypeNumber(finalRange, decision, inverse);
return type;
}
};
} catch (NumberFormatException e) {
return null;
}
}
use of com.bakdata.conquery.models.preproc.parser.specific.IntegerParser in project conquery by bakdata.
the class ImportJob method applyDictionaryMappings.
/**
* Apply new positions into incoming shared dictionaries.
*/
private void applyDictionaryMappings(Map<String, DictionaryMapping> mappings, Map<String, ColumnStore> values) {
final ProgressReporter subJob = getProgressReporter().subJob(mappings.size());
for (Map.Entry<String, DictionaryMapping> entry : mappings.entrySet()) {
final String columnName = entry.getKey();
final DictionaryMapping mapping = entry.getValue();
final StringStore stringStore = (StringStore) values.get(columnName);
log.debug("Remapping Column[{}] = {} with {}", columnName, stringStore, mapping);
// we need to find a new Type for the index-Column as it's going to be remapped and might change in size
final IntegerParser indexParser = new IntegerParser(config);
final IntSummaryStatistics statistics = mapping.target().intStream().summaryStatistics();
indexParser.setLines(stringStore.getLines());
indexParser.setMinValue(statistics.getMin());
indexParser.setMaxValue(statistics.getMax());
final IntegerStore newType = indexParser.findBestType();
log.trace("Decided for {}", newType);
mapping.applyToStore(stringStore, newType);
stringStore.setIndexStore(newType);
subJob.report(1);
}
}
Aggregations