Search in sources :

Example 6 with IdMapper

use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldHandleLargeAmountsOfDuplicateNodeIds.

@Test
public void shouldHandleLargeAmountsOfDuplicateNodeIds() throws Exception {
    // GIVEN
    IdMapper mapper = mapper(new LongEncoder(), Radix.LONG, NO_MONITOR);
    long nodeId = 0;
    int high = 10;
    // a list of input ids
    List<Object> ids = new ArrayList<>();
    for (int run = 0; run < 2; run++) {
        for (long i = 0; i < high / 2; i++) {
            ids.add(high - (i + 1));
            ids.add(i);
        }
    }
    // fed to the IdMapper
    for (Object inputId : ids) {
        mapper.put(inputId, nodeId++, GLOBAL);
    }
    // WHEN
    Collector collector = mock(Collector.class);
    mapper.prepare(SimpleInputIteratorWrapper.wrap("source", ids), collector, NONE);
    // THEN
    verify(collector, times(high)).collectDuplicateNode(any(Object.class), anyLong(), anyString(), anyString(), anyString());
}
Also used : ArrayList(java.util.ArrayList) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Example 7 with IdMapper

use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldReportCollisionsForSameInputId.

@Test
public void shouldReportCollisionsForSameInputId() throws Exception {
    // GIVEN
    IdMapper mapper = mapper(new StringEncoder(), Radix.STRING, NO_MONITOR);
    InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9", "10"));
    try (ResourceIterator<Object> iterator = ids.iterator()) {
        for (int i = 0; iterator.hasNext(); i++) {
            mapper.put(iterator.next(), i, GLOBAL);
        }
    }
    // WHEN
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, NONE);
    // THEN
    verify(collector, times(1)).collectDuplicateNode("10", 2, GLOBAL.name(), "source:1", "source:3");
    verifyNoMoreInteractions(collector);
}
Also used : Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Example 8 with IdMapper

use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.

the class ParallelBatchImporter method doImport.

@Override
public void doImport(Input input) throws IOException {
    log.info("Import starting");
    // Things that we need to close later. The reason they're not in the try-with-resource statement
    // is that we need to close, and set to null, at specific points preferably. So use good ol' finally block.
    NodeRelationshipCache nodeRelationshipCache = null;
    NodeLabelsCache nodeLabelsCache = null;
    long startTime = currentTimeMillis();
    CountingStoreUpdateMonitor storeUpdateMonitor = new CountingStoreUpdateMonitor();
    try (BatchingNeoStores neoStore = getBatchingNeoStores();
        CountsAccessor.Updater countsUpdater = neoStore.getCountsStore().reset(neoStore.getLastCommittedTransactionId());
        InputCache inputCache = new InputCache(fileSystem, storeDir, recordFormats, config)) {
        Collector badCollector = input.badCollector();
        // Some temporary caches and indexes in the import
        IoMonitor writeMonitor = new IoMonitor(neoStore.getIoTracer());
        IdMapper idMapper = input.idMapper();
        IdGenerator idGenerator = input.idGenerator();
        nodeRelationshipCache = new NodeRelationshipCache(AUTO, config.denseNodeThreshold());
        StatsProvider memoryUsageStats = new MemoryUsageStatsProvider(nodeRelationshipCache, idMapper);
        InputIterable<InputNode> nodes = input.nodes();
        InputIterable<InputRelationship> relationships = input.relationships();
        InputIterable<InputNode> cachedNodes = cachedForSure(nodes, inputCache.nodes(MAIN, true));
        InputIterable<InputRelationship> cachedRelationships = cachedForSure(relationships, inputCache.relationships(MAIN, true));
        RelationshipStore relationshipStore = neoStore.getRelationshipStore();
        // Stage 1 -- nodes, properties, labels
        NodeStage nodeStage = new NodeStage(config, writeMonitor, nodes, idMapper, idGenerator, neoStore, inputCache, neoStore.getLabelScanStore(), storeUpdateMonitor, nodeRelationshipCache, memoryUsageStats);
        executeStage(nodeStage);
        if (idMapper.needsPreparation()) {
            executeStage(new IdMapperPreparationStage(config, idMapper, cachedNodes, badCollector, memoryUsageStats));
            PrimitiveLongIterator duplicateNodeIds = badCollector.leftOverDuplicateNodesIds();
            if (duplicateNodeIds.hasNext()) {
                executeStage(new DeleteDuplicateNodesStage(config, duplicateNodeIds, neoStore));
            }
        }
        // Stage 2 -- calculate dense node threshold
        CalculateDenseNodesStage calculateDenseNodesStage = new CalculateDenseNodesStage(withBatchSize(config, config.batchSize() * 10), relationships, nodeRelationshipCache, idMapper, badCollector, inputCache, neoStore);
        executeStage(calculateDenseNodesStage);
        importRelationships(nodeRelationshipCache, storeUpdateMonitor, neoStore, writeMonitor, idMapper, cachedRelationships, inputCache, calculateDenseNodesStage.getRelationshipTypes(Long.MAX_VALUE), calculateDenseNodesStage.getRelationshipTypes(100));
        // Release this potentially really big piece of cached data
        long peakMemoryUsage = totalMemoryUsageOf(idMapper, nodeRelationshipCache);
        long highNodeId = nodeRelationshipCache.getHighNodeId();
        idMapper.close();
        idMapper = null;
        nodeRelationshipCache.close();
        nodeRelationshipCache = null;
        new RelationshipGroupDefragmenter(config, executionMonitor).run(max(max(peakMemoryUsage, highNodeId * 4), mebiBytes(1)), neoStore, highNodeId);
        // Stage 6 -- count nodes per label and labels per node
        nodeLabelsCache = new NodeLabelsCache(AUTO, neoStore.getLabelRepository().getHighId());
        memoryUsageStats = new MemoryUsageStatsProvider(nodeLabelsCache);
        executeStage(new NodeCountsStage(config, nodeLabelsCache, neoStore.getNodeStore(), neoStore.getLabelRepository().getHighId(), countsUpdater, memoryUsageStats));
        // Stage 7 -- count label-[type]->label
        executeStage(new RelationshipCountsStage(config, nodeLabelsCache, relationshipStore, neoStore.getLabelRepository().getHighId(), neoStore.getRelationshipTypeRepository().getHighId(), countsUpdater, AUTO));
        // We're done, do some final logging about it
        long totalTimeMillis = currentTimeMillis() - startTime;
        executionMonitor.done(totalTimeMillis, format("%n") + storeUpdateMonitor.toString() + format("%n") + "Peak memory usage: " + bytes(peakMemoryUsage));
        log.info("Import completed, took " + Format.duration(totalTimeMillis) + ". " + storeUpdateMonitor);
    } catch (Throwable t) {
        log.error("Error during import", t);
        throw Exceptions.launderedException(IOException.class, t);
    } finally {
        if (nodeRelationshipCache != null) {
            nodeRelationshipCache.close();
        }
        if (nodeLabelsCache != null) {
            nodeLabelsCache.close();
        }
    }
}
Also used : NodeLabelsCache(org.neo4j.unsafe.impl.batchimport.cache.NodeLabelsCache) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) CountsAccessor(org.neo4j.kernel.impl.api.CountsAccessor) PrimitiveLongIterator(org.neo4j.collection.primitive.PrimitiveLongIterator) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) InputCache(org.neo4j.unsafe.impl.batchimport.input.InputCache) NodeRelationshipCache(org.neo4j.unsafe.impl.batchimport.cache.NodeRelationshipCache) InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) BatchingNeoStores(org.neo4j.unsafe.impl.batchimport.store.BatchingNeoStores) IdGenerator(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdGenerator) IOException(java.io.IOException) StatsProvider(org.neo4j.unsafe.impl.batchimport.stats.StatsProvider) RelationshipStore(org.neo4j.kernel.impl.store.RelationshipStore) IoMonitor(org.neo4j.unsafe.impl.batchimport.store.io.IoMonitor)

Example 9 with IdMapper

use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldReportyProgressForSortAndDetect.

@Test
public void shouldReportyProgressForSortAndDetect() throws Exception {
    // GIVEN
    IdMapper idMapper = mapper(new StringEncoder(), Radix.STRING, NO_MONITOR);
    ProgressListener progress = mock(ProgressListener.class);
    idMapper.prepare(null, mock(Collector.class), progress);
    // WHEN
    long id = idMapper.get("123", GLOBAL);
    // THEN
    assertEquals(ID_NOT_FOUND, id);
    verify(progress, times(3)).started(anyString());
    verify(progress, times(3)).done();
}
Also used : ProgressListener(org.neo4j.helpers.progress.ProgressListener) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Example 10 with IdMapper

use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldCopeWithCollisionsBasedOnDifferentInputIds.

@Test
public void shouldCopeWithCollisionsBasedOnDifferentInputIds() throws Exception {
    // GIVEN
    Monitor monitor = mock(Monitor.class);
    Encoder encoder = mock(Encoder.class);
    when(encoder.encode(any())).thenReturn(12345L);
    IdMapper mapper = mapper(encoder, Radix.STRING, monitor);
    InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9"));
    try (ResourceIterator<Object> iterator = ids.iterator()) {
        for (int i = 0; iterator.hasNext(); i++) {
            mapper.put(iterator.next(), i, GLOBAL);
        }
    }
    // WHEN
    ProgressListener progress = mock(ProgressListener.class);
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, progress);
    // THEN
    verifyNoMoreInteractions(collector);
    verify(monitor).numberOfCollisions(2);
    assertEquals(0L, mapper.get("10", GLOBAL));
    assertEquals(1L, mapper.get("9", GLOBAL));
    // 7 times since SPLIT+SORT+DETECT+RESOLVE+SPLIT+SORT,DEDUPLICATE
    verify(progress, times(7)).started(anyString());
    verify(progress, times(7)).done();
}
Also used : Monitor(org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.EncodingIdMapper.Monitor) ProgressListener(org.neo4j.helpers.progress.ProgressListener) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Aggregations

IdMapper (org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper)17 Test (org.junit.Test)16 Collector (org.neo4j.unsafe.impl.batchimport.input.Collector)15 Collectors.badCollector (org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector)14 Group (org.neo4j.unsafe.impl.batchimport.input.Group)6 ArrayList (java.util.ArrayList)3 ProgressListener (org.neo4j.helpers.progress.ProgressListener)3 Monitor (org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.EncodingIdMapper.Monitor)3 Matchers.containsString (org.hamcrest.Matchers.containsString)2 Matchers.anyString (org.mockito.Matchers.anyString)2 PrimitiveLongIterator (org.neo4j.collection.primitive.PrimitiveLongIterator)2 Groups (org.neo4j.unsafe.impl.batchimport.input.Groups)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 Iterator (java.util.Iterator)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Matchers.anyLong (org.mockito.Matchers.anyLong)1 ResourceIterator (org.neo4j.graphdb.ResourceIterator)1 PrefetchingIterator (org.neo4j.helpers.collection.PrefetchingIterator)1 CountsAccessor (org.neo4j.kernel.impl.api.CountsAccessor)1