use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldHandleLargeAmountsOfDuplicateNodeIds.
@Test
public void shouldHandleLargeAmountsOfDuplicateNodeIds() throws Exception {
// GIVEN
IdMapper mapper = mapper(new LongEncoder(), Radix.LONG, NO_MONITOR);
long nodeId = 0;
int high = 10;
// a list of input ids
List<Object> ids = new ArrayList<>();
for (int run = 0; run < 2; run++) {
for (long i = 0; i < high / 2; i++) {
ids.add(high - (i + 1));
ids.add(i);
}
}
// fed to the IdMapper
for (Object inputId : ids) {
mapper.put(inputId, nodeId++, GLOBAL);
}
// WHEN
Collector collector = mock(Collector.class);
mapper.prepare(SimpleInputIteratorWrapper.wrap("source", ids), collector, NONE);
// THEN
verify(collector, times(high)).collectDuplicateNode(any(Object.class), anyLong(), anyString(), anyString(), anyString());
}
use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldReportCollisionsForSameInputId.
@Test
public void shouldReportCollisionsForSameInputId() throws Exception {
// GIVEN
IdMapper mapper = mapper(new StringEncoder(), Radix.STRING, NO_MONITOR);
InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9", "10"));
try (ResourceIterator<Object> iterator = ids.iterator()) {
for (int i = 0; iterator.hasNext(); i++) {
mapper.put(iterator.next(), i, GLOBAL);
}
}
// WHEN
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, NONE);
// THEN
verify(collector, times(1)).collectDuplicateNode("10", 2, GLOBAL.name(), "source:1", "source:3");
verifyNoMoreInteractions(collector);
}
use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.
the class ParallelBatchImporter method doImport.
@Override
public void doImport(Input input) throws IOException {
log.info("Import starting");
// Things that we need to close later. The reason they're not in the try-with-resource statement
// is that we need to close, and set to null, at specific points preferably. So use good ol' finally block.
NodeRelationshipCache nodeRelationshipCache = null;
NodeLabelsCache nodeLabelsCache = null;
long startTime = currentTimeMillis();
CountingStoreUpdateMonitor storeUpdateMonitor = new CountingStoreUpdateMonitor();
try (BatchingNeoStores neoStore = getBatchingNeoStores();
CountsAccessor.Updater countsUpdater = neoStore.getCountsStore().reset(neoStore.getLastCommittedTransactionId());
InputCache inputCache = new InputCache(fileSystem, storeDir, recordFormats, config)) {
Collector badCollector = input.badCollector();
// Some temporary caches and indexes in the import
IoMonitor writeMonitor = new IoMonitor(neoStore.getIoTracer());
IdMapper idMapper = input.idMapper();
IdGenerator idGenerator = input.idGenerator();
nodeRelationshipCache = new NodeRelationshipCache(AUTO, config.denseNodeThreshold());
StatsProvider memoryUsageStats = new MemoryUsageStatsProvider(nodeRelationshipCache, idMapper);
InputIterable<InputNode> nodes = input.nodes();
InputIterable<InputRelationship> relationships = input.relationships();
InputIterable<InputNode> cachedNodes = cachedForSure(nodes, inputCache.nodes(MAIN, true));
InputIterable<InputRelationship> cachedRelationships = cachedForSure(relationships, inputCache.relationships(MAIN, true));
RelationshipStore relationshipStore = neoStore.getRelationshipStore();
// Stage 1 -- nodes, properties, labels
NodeStage nodeStage = new NodeStage(config, writeMonitor, nodes, idMapper, idGenerator, neoStore, inputCache, neoStore.getLabelScanStore(), storeUpdateMonitor, nodeRelationshipCache, memoryUsageStats);
executeStage(nodeStage);
if (idMapper.needsPreparation()) {
executeStage(new IdMapperPreparationStage(config, idMapper, cachedNodes, badCollector, memoryUsageStats));
PrimitiveLongIterator duplicateNodeIds = badCollector.leftOverDuplicateNodesIds();
if (duplicateNodeIds.hasNext()) {
executeStage(new DeleteDuplicateNodesStage(config, duplicateNodeIds, neoStore));
}
}
// Stage 2 -- calculate dense node threshold
CalculateDenseNodesStage calculateDenseNodesStage = new CalculateDenseNodesStage(withBatchSize(config, config.batchSize() * 10), relationships, nodeRelationshipCache, idMapper, badCollector, inputCache, neoStore);
executeStage(calculateDenseNodesStage);
importRelationships(nodeRelationshipCache, storeUpdateMonitor, neoStore, writeMonitor, idMapper, cachedRelationships, inputCache, calculateDenseNodesStage.getRelationshipTypes(Long.MAX_VALUE), calculateDenseNodesStage.getRelationshipTypes(100));
// Release this potentially really big piece of cached data
long peakMemoryUsage = totalMemoryUsageOf(idMapper, nodeRelationshipCache);
long highNodeId = nodeRelationshipCache.getHighNodeId();
idMapper.close();
idMapper = null;
nodeRelationshipCache.close();
nodeRelationshipCache = null;
new RelationshipGroupDefragmenter(config, executionMonitor).run(max(max(peakMemoryUsage, highNodeId * 4), mebiBytes(1)), neoStore, highNodeId);
// Stage 6 -- count nodes per label and labels per node
nodeLabelsCache = new NodeLabelsCache(AUTO, neoStore.getLabelRepository().getHighId());
memoryUsageStats = new MemoryUsageStatsProvider(nodeLabelsCache);
executeStage(new NodeCountsStage(config, nodeLabelsCache, neoStore.getNodeStore(), neoStore.getLabelRepository().getHighId(), countsUpdater, memoryUsageStats));
// Stage 7 -- count label-[type]->label
executeStage(new RelationshipCountsStage(config, nodeLabelsCache, relationshipStore, neoStore.getLabelRepository().getHighId(), neoStore.getRelationshipTypeRepository().getHighId(), countsUpdater, AUTO));
// We're done, do some final logging about it
long totalTimeMillis = currentTimeMillis() - startTime;
executionMonitor.done(totalTimeMillis, format("%n") + storeUpdateMonitor.toString() + format("%n") + "Peak memory usage: " + bytes(peakMemoryUsage));
log.info("Import completed, took " + Format.duration(totalTimeMillis) + ". " + storeUpdateMonitor);
} catch (Throwable t) {
log.error("Error during import", t);
throw Exceptions.launderedException(IOException.class, t);
} finally {
if (nodeRelationshipCache != null) {
nodeRelationshipCache.close();
}
if (nodeLabelsCache != null) {
nodeLabelsCache.close();
}
}
}
use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldReportyProgressForSortAndDetect.
@Test
public void shouldReportyProgressForSortAndDetect() throws Exception {
// GIVEN
IdMapper idMapper = mapper(new StringEncoder(), Radix.STRING, NO_MONITOR);
ProgressListener progress = mock(ProgressListener.class);
idMapper.prepare(null, mock(Collector.class), progress);
// WHEN
long id = idMapper.get("123", GLOBAL);
// THEN
assertEquals(ID_NOT_FOUND, id);
verify(progress, times(3)).started(anyString());
verify(progress, times(3)).done();
}
use of org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldCopeWithCollisionsBasedOnDifferentInputIds.
@Test
public void shouldCopeWithCollisionsBasedOnDifferentInputIds() throws Exception {
// GIVEN
Monitor monitor = mock(Monitor.class);
Encoder encoder = mock(Encoder.class);
when(encoder.encode(any())).thenReturn(12345L);
IdMapper mapper = mapper(encoder, Radix.STRING, monitor);
InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9"));
try (ResourceIterator<Object> iterator = ids.iterator()) {
for (int i = 0; iterator.hasNext(); i++) {
mapper.put(iterator.next(), i, GLOBAL);
}
}
// WHEN
ProgressListener progress = mock(ProgressListener.class);
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, progress);
// THEN
verifyNoMoreInteractions(collector);
verify(monitor).numberOfCollisions(2);
assertEquals(0L, mapper.get("10", GLOBAL));
assertEquals(1L, mapper.get("9", GLOBAL));
// 7 times since SPLIT+SORT+DETECT+RESOLVE+SPLIT+SORT,DEDUPLICATE
verify(progress, times(7)).started(anyString());
verify(progress, times(7)).done();
}
Aggregations