Search in sources :

Example 21 with LongArrayList

use of it.unimi.dsi.fastutil.longs.LongArrayList in project caffeine by ben-manes.

the class PolicyActor method onReceive.

@Override
public void onReceive(Object msg) {
    if (msg instanceof LongArrayList) {
        LongArrayList events = (LongArrayList) msg;
        process(events);
    } else if (msg == Message.FINISH) {
        policy.finished();
        getSender().tell(policy.stats(), ActorRef.noSender());
        getContext().stop(getSelf());
    } else if (msg == Message.ERROR) {
        getContext().stop(getSelf());
    } else {
        context().system().log().error("Invalid message: " + msg);
    }
}
Also used : LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList)

Example 22 with LongArrayList

use of it.unimi.dsi.fastutil.longs.LongArrayList in project presto by prestodb.

the class ParquetHiveRecordCursor method createParquetRecordReader.

private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
    ParquetDataSource dataSource = null;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
        dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
        ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(sessionUser, () -> ParquetFileReader.readFooter(configuration, path, NO_FILTER));
        List<BlockMetaData> blocks = parquetMetadata.getBlocks();
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileSchema);
        List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
        LongArrayList offsets = new LongArrayList(blocks.size());
        for (BlockMetaData block : blocks) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                if (predicatePushdownEnabled) {
                    ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
                    if (predicateMatches(parquetPredicate, block, dataSource, requestedSchema, effectivePredicate)) {
                        offsets.add(block.getStartingPos());
                    }
                } else {
                    offsets.add(block.getStartingPos());
                }
            }
        }
        ParquetInputSplit split = new ParquetInputSplit(path, start, start + length, length, null, offsets.toLongArray());
        TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID());
        return hdfsEnvironment.doAs(sessionUser, () -> {
            ParquetRecordReader<FakeParquetRecord> realReader = new PrestoParquetRecordReader(readSupport);
            realReader.initialize(split, taskContext);
            return realReader;
        });
    } catch (Exception e) {
        Throwables.propagateIfInstanceOf(e, PrestoException.class);
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
            throw Throwables.propagate(e);
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    } finally {
        if (dataSource != null) {
            try {
                dataSource.close();
            } catch (IOException ignored) {
            }
        }
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Arrays(java.util.Arrays) Block(com.facebook.presto.spi.block.Block) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CURSOR_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_CURSOR_ERROR) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Slices.wrappedBuffer(io.airlift.slice.Slices.wrappedBuffer) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DecimalType(com.facebook.presto.spi.type.DecimalType) DecimalMetadata(parquet.schema.DecimalMetadata) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BigInteger(java.math.BigInteger) PrimitiveType(parquet.schema.PrimitiveType) MAP_KEY_VALUE(parquet.schema.OriginalType.MAP_KEY_VALUE) Decimals(com.facebook.presto.spi.type.Decimals) ReadSupport(parquet.hadoop.api.ReadSupport) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) BlockBuilder(com.facebook.presto.spi.block.BlockBuilder) Math.min(java.lang.Math.min) Chars.trimSpacesAndTruncateToLength(com.facebook.presto.spi.type.Chars.trimSpacesAndTruncateToLength) Binary(parquet.io.api.Binary) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) ROW(com.facebook.presto.spi.type.StandardTypes.ROW) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) NO_FILTER(parquet.format.converter.ParquetMetadataConverter.NO_FILTER) Optional(java.util.Optional) Math.max(java.lang.Math.max) Varchars.truncateToLength(com.facebook.presto.spi.type.Varchars.truncateToLength) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) RecordMaterializer(parquet.io.api.RecordMaterializer) Converter(parquet.io.api.Converter) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) GroupConverter(parquet.io.api.GroupConverter) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) Slice(io.airlift.slice.Slice) ParquetFileReader(parquet.hadoop.ParquetFileReader) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ParquetRecordReader(parquet.hadoop.ParquetRecordReader) PrestoException(com.facebook.presto.spi.PrestoException) PrimitiveConverter(parquet.io.api.PrimitiveConverter) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) MAP(com.facebook.presto.spi.type.StandardTypes.MAP) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) DecimalUtils(com.facebook.presto.hive.util.DecimalUtils) ARRAY(com.facebook.presto.spi.type.StandardTypes.ARRAY) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) HiveUtil.closeWithSuppression(com.facebook.presto.hive.HiveUtil.closeWithSuppression) Type(com.facebook.presto.spi.type.Type) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) Objects.requireNonNull(java.util.Objects.requireNonNull) DECIMAL(parquet.schema.OriginalType.DECIMAL) BlockBuilderStatus(com.facebook.presto.spi.block.BlockBuilderStatus) Dictionary(parquet.column.Dictionary) TIMESTAMP(com.facebook.presto.spi.type.TimestampType.TIMESTAMP) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) MessageType(parquet.schema.MessageType) Properties(java.util.Properties) ParquetPredicateUtils.predicateMatches(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches) HiveUtil.getDecimalType(com.facebook.presto.hive.HiveUtil.getDecimalType) ContextUtil(parquet.hadoop.util.ContextUtil) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) FileMetaData(parquet.hadoop.metadata.FileMetaData) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) Collectors.toList(java.util.stream.Collectors.toList) GroupType(parquet.schema.GroupType) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) ParquetInputSplit(parquet.hadoop.ParquetInputSplit) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) PrestoException(com.facebook.presto.spi.PrestoException) FileSystem(org.apache.hadoop.fs.FileSystem) FileMetaData(parquet.hadoop.metadata.FileMetaData) MessageType(parquet.schema.MessageType) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) DecimalType(com.facebook.presto.spi.type.DecimalType) PrimitiveType(parquet.schema.PrimitiveType) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) Type(com.facebook.presto.spi.type.Type) MessageType(parquet.schema.MessageType) HiveUtil.getDecimalType(com.facebook.presto.hive.HiveUtil.getDecimalType) GroupType(parquet.schema.GroupType) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) ParquetInputSplit(parquet.hadoop.ParquetInputSplit)

Example 23 with LongArrayList

use of it.unimi.dsi.fastutil.longs.LongArrayList in project cdap by caskdata.

the class StreamDataFileIndex method loadIndex.

private Map.Entry<LongList, LongList> loadIndex(InputStream input) throws IOException {
    byte[] magic = new byte[INDEX_MAGIC_HEADER.length];
    ByteStreams.readFully(input, magic);
    if (!Arrays.equals(magic, INDEX_MAGIC_HEADER)) {
        throw new IOException("Unsupported index file format. Expected magic bytes as 'I' '1'");
    }
    // Decode the properties map. In current version, it is not used.
    StreamUtils.decodeMap(new BinaryDecoder(input));
    // Read in all index (timestamp, position pairs).
    LongList timestamps = new LongArrayList(1000);
    LongList positions = new LongArrayList(1000);
    byte[] buf = new byte[Longs.BYTES * 2];
    while (ByteStreams.read(input, buf, 0, buf.length) == buf.length) {
        timestamps.add(Bytes.toLong(buf, 0));
        positions.add(Bytes.toLong(buf, Longs.BYTES));
    }
    return Maps.immutableEntry(timestamps, positions);
}
Also used : LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) IOException(java.io.IOException) LongList(it.unimi.dsi.fastutil.longs.LongList) BinaryDecoder(co.cask.cdap.common.io.BinaryDecoder)

Example 24 with LongArrayList

use of it.unimi.dsi.fastutil.longs.LongArrayList in project mkgmap by openstreetmap.

the class HousenumberGenerator method useAddrPlaceTag.

private void useAddrPlaceTag(List<HousenumberRoad> hnrList) {
    HashMap<CityInfo, MultiHashMap<String, HousenumberMatch>> cityPlaceHouseMap = new LinkedHashMap<>();
    for (int i = 0; i < houseElems.size(); i++) {
        HousenumberElem house = houseElems.get(i);
        if (house.getRoad() == null)
            continue;
        if (house.getPlace() == null)
            continue;
        if (house instanceof HousenumberMatch) {
            HousenumberMatch hm = (HousenumberMatch) house;
            if (hm.getHousenumberRoad() == null)
                continue;
        } else
            continue;
        MultiHashMap<String, HousenumberMatch> subMap = cityPlaceHouseMap.get(house.getCityInfo());
        if (subMap == null) {
            subMap = new MultiHashMap<>();
            cityPlaceHouseMap.put(house.getCityInfo(), subMap);
        }
        subMap.add(house.getPlace(), (HousenumberMatch) house);
    }
    log.info("analysing", cityPlaceHouseMap.size(), "cities with addr:place=* houses");
    for (Entry<CityInfo, MultiHashMap<String, HousenumberMatch>> topEntry : cityPlaceHouseMap.entrySet()) {
        CityInfo cityInfo = topEntry.getKey();
        List<String> placeNames = new ArrayList<>(topEntry.getValue().keySet());
        Collections.sort(placeNames);
        for (String placeName : placeNames) {
            List<HousenumberMatch> placeHouses = topEntry.getValue().get(placeName);
            HashSet<HousenumberRoad> roads = new LinkedHashSet<>();
            Int2IntOpenHashMap usedNumbers = new Int2IntOpenHashMap();
            HashMap<String, Integer> usedSigns = new HashMap<>();
            int dupSigns = 0;
            int dupNumbers = 0;
            int housesWithStreet = 0;
            int housesWithMatchingStreet = 0;
            int roadsWithNames = 0;
            int unnamedCloseRoads = 0;
            for (HousenumberMatch house : placeHouses) {
                if (house.getStreet() != null) {
                    ++housesWithStreet;
                    if (house.getStreet().equalsIgnoreCase(house.getRoad().getStreet())) {
                        ++housesWithMatchingStreet;
                    }
                } else {
                    if (house.getRoad().getStreet() == null)
                        ++unnamedCloseRoads;
                }
                boolean added = roads.add(house.getHousenumberRoad());
                if (added && house.getRoad().getStreet() != null)
                    ++roadsWithNames;
                int oldCount = usedNumbers.put(house.getHousenumber(), 1);
                if (oldCount != 0) {
                    usedNumbers.put(house.getHousenumber(), oldCount + 1);
                    ++dupNumbers;
                }
                Integer oldSignCount = usedSigns.put(house.getSign(), 1);
                if (oldSignCount != null) {
                    usedSigns.put(house.getSign(), oldSignCount + 1);
                    ++dupSigns;
                }
            }
            if (log.isDebugEnabled()) {
                log.debug("place", placeName, "in city", cityInfo, ":", "houses:", placeHouses.size(), ",duplicate numbers/signs:", dupNumbers + "/" + dupSigns, ",roads (named/unnamed):", roads.size(), "(" + roadsWithNames + "/" + (roads.size() - roadsWithNames) + ")", ",houses without addr:street:", placeHouses.size() - housesWithStreet, ",street = name of closest road:", housesWithMatchingStreet, ",houses without addr:street near named road:", unnamedCloseRoads);
            }
            if ((float) dupSigns / placeHouses.size() < 0.25) {
                if (log.isDebugEnabled())
                    log.debug("will not use gaps in intervals for roads in", placeName);
                for (HousenumberRoad hnr : roads) {
                    hnr.setRemoveGaps(true);
                }
            }
            if (placeHouses.size() > housesWithStreet) {
                // XXX: threshold value?
                LongArrayList ids = new LongArrayList();
                for (HousenumberRoad hnr : roads) {
                    ids.add(hnr.getRoad().getRoadDef().getId());
                    hnr.addPlaceName(placeName);
                }
                if (log.isDebugEnabled())
                    log.debug("detected", placeName, "as potential address name for roads", ids);
            } else {
                if (log.isDebugEnabled())
                    log.debug("will ignore addr:place for address search in", placeName, "in city", cityInfo);
            }
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) CityInfo(uk.me.parabola.mkgmap.general.CityInfo) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Long2ObjectOpenHashMap(it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) MultiHashMap(uk.me.parabola.util.MultiHashMap) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) ArrayList(java.util.ArrayList) MultiHashMap(uk.me.parabola.util.MultiHashMap) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 25 with LongArrayList

use of it.unimi.dsi.fastutil.longs.LongArrayList in project gridss by PapenfussLab.

the class KmerEncodingHelperTest method partialSequenceBasesDifferent_should_calc_forward_anchored.

@Test
public void partialSequenceBasesDifferent_should_calc_forward_anchored() {
    for (int k = 1; k < 4; k++) {
        LongArrayList ref = KPN(k, "ACTGGTTAACACGTCAGGTACGTCG", 1, 1, true).pathKmers();
        assertEquals(0, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "ACTGGTTAACACGTCAGGTACGTCG", 1, 1, true).pathKmers(), 0, true));
        assertEquals(0, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "ACTGGTTAACACGTCAGGTACGTCG", 1, 1, true).pathKmers(), 0, false));
        assertEquals(1, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "ACTGGTTAACACGTCAGGTACTTCG", 1, 1, true).pathKmers(), 0, true));
        assertEquals(1, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "ACTGGTTAACACGTCAGGTACTTCG", 1, 1, true).pathKmers(), 0, false));
        // *
        assertEquals(2, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "ACTGGTTAACACGTCAGGTAGTTCG", 1, 1, true).pathKmers(), 0, true));
        assertEquals(2, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "ACTGGTTAACACGTCAGGTAGTTCG", 1, 1, true).pathKmers(), 0, false));
        // **
        assertEquals(3, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "ATTGGTTAACACGTCAGGTAGTTCG", 1, 1, true).pathKmers(), 0, true));
        assertEquals(3, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "ATTGGTTAACACGTCAGGTAGTTCG", 1, 1, true).pathKmers(), 0, false));
        // *                  **
        // ACTGGTTAACACGTCAGGTACGTCG
        assertEquals(0, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "CTGGTTAACACGTCAGGTACGTC", 1, 1, true).pathKmers(), 1, true));
        assertEquals(0, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "CTGGTTAACACGTCAGGTACGTC", 1, 1, true).pathKmers(), 1, false));
        // ACTG*TTAACACGTCAGGTACGTCG
        assertEquals(1, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "CTGTTTAACACGTCAGGTACGTC", 1, 1, true).pathKmers(), 1, true));
        assertEquals(1, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "CTGTTTAACACGTCAGGTACGTC", 1, 1, true).pathKmers(), 1, false));
        // ACTG*TTAACACGTCAGGTACGTCG
        assertEquals(1, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "CTGTTTAACACGTCAGGTACGTC", 1, 1, true).pathKmers(), 1, true));
        assertEquals(1, KmerEncodingHelper.partialSequenceBasesDifferent(k, ref, KPN(k, "CTGTTTAACACGTCAGGTACGTC", 1, 1, true).pathKmers(), 1, false));
    }
}
Also used : LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Test(org.junit.Test)

Aggregations

LongArrayList (it.unimi.dsi.fastutil.longs.LongArrayList)37 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)5 ArrayList (java.util.ArrayList)4 Test (org.junit.Test)4 IndexPartGetLongResult (com.tencent.angel.ml.matrix.psf.get.indexed.IndexPartGetLongResult)3 ServerLongAnyRow (com.tencent.angel.ps.storage.vector.ServerLongAnyRow)3 Long2ObjectMap (it.unimi.dsi.fastutil.longs.Long2ObjectMap)3 LongList (it.unimi.dsi.fastutil.longs.LongList)3 LongOpenHashSet (it.unimi.dsi.fastutil.longs.LongOpenHashSet)3 IOException (java.io.IOException)3 List (java.util.List)3 Block (com.facebook.presto.spi.block.Block)2 Type (com.facebook.presto.spi.type.Type)2 Supplier (com.google.common.base.Supplier)2 LongFloatSparseVectorStorage (com.tencent.angel.ml.math2.storage.LongFloatSparseVectorStorage)2 LongFloatVector (com.tencent.angel.ml.math2.vector.LongFloatVector)2 FloatArrayList (it.unimi.dsi.fastutil.floats.FloatArrayList)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)1