Search in sources :

Example 1 with Collector

use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.

the class CsvImporter method doImport.

@Override
public void doImport() throws IOException {
    if (force) {
        fileSystem.deleteRecursively(databaseLayout.databaseDirectory());
        fileSystem.deleteRecursively(databaseLayout.getTransactionLogsDirectory());
    }
    try (OutputStream badOutput = fileSystem.openAsOutputStream(reportFile, false);
        Collector badCollector = getBadCollector(skipBadEntriesLogging, badOutput)) {
        // Extract the default time zone from the database configuration
        ZoneId dbTimeZone = databaseConfig.get(GraphDatabaseSettings.db_temporal_timezone);
        Supplier<ZoneId> defaultTimeZone = () -> dbTimeZone;
        final var nodeData = nodeData();
        final var relationshipsData = relationshipData();
        CsvInput input = new CsvInput(nodeData, defaultFormatNodeFileHeader(defaultTimeZone, normalizeTypes), relationshipsData, defaultFormatRelationshipFileHeader(defaultTimeZone, normalizeTypes), idType, csvConfig, new CsvInput.PrintingMonitor(stdOut), memoryTracker);
        doImport(input, badCollector);
    }
}
Also used : ZoneId(java.time.ZoneId) OutputStream(java.io.OutputStream) Collector(org.neo4j.internal.batchimport.input.Collector) Collectors.badCollector(org.neo4j.internal.batchimport.input.Collectors.badCollector) Collectors.silentBadCollector(org.neo4j.internal.batchimport.input.Collectors.silentBadCollector) BadCollector(org.neo4j.internal.batchimport.input.BadCollector) CsvInput(org.neo4j.internal.batchimport.input.csv.CsvInput)

Example 2 with Collector

use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.

the class CsvInputTest method shouldIgnoreEmptyExtraColumns.

@Test
public void shouldIgnoreEmptyExtraColumns() throws Exception {
    // GIVEN
    Iterable<DataFactory> data = datas(CsvInputTest.data(":ID,one\n" + "1,test,\n" + "2,test,,additional"));
    // WHEN
    Collector collector = mock(Collector.class);
    Input input = new CsvInput(data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.INTEGER, config(), NO_MONITOR, INSTANCE);
    // THEN
    try (InputIterator nodes = input.nodes(collector).iterator()) {
        // THEN
        assertNextNode(nodes, 1L, properties("one", "test"), labels());
        assertNextNode(nodes, 2L, properties("one", "test"), labels());
        assertFalse(readNext(nodes));
    }
    verify(collector).collectExtraColumns(anyString(), eq(1L), eq(null));
    verify(collector).collectExtraColumns(anyString(), eq(2L), eq(null));
    verify(collector).collectExtraColumns(anyString(), eq(2L), eq("additional"));
}
Also used : InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Collector(org.neo4j.internal.batchimport.input.Collector) Test(org.junit.Test)

Example 3 with Collector

use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldHandleLargeAmountsOfDuplicateNodeIds.

@Test
public void shouldHandleLargeAmountsOfDuplicateNodeIds() {
    // GIVEN
    IdMapper mapper = mapper(new LongEncoder(), Radix.LONG, EncodingIdMapper.NO_MONITOR);
    long nodeId = 0;
    int high = 10;
    // a list of input ids
    List<Object> ids = new ArrayList<>();
    for (int run = 0; run < 2; run++) {
        for (long i = 0; i < high / 2; i++) {
            ids.add(high - (i + 1));
            ids.add(i);
        }
    }
    // fed to the IdMapper
    for (Object inputId : ids) {
        mapper.put(inputId, nodeId++, Group.GLOBAL);
    }
    // WHEN
    Collector collector = mock(Collector.class);
    mapper.prepare(values(ids.toArray()), collector, NONE);
    // THEN
    verify(collector, times(high)).collectDuplicateNode(any(Object.class), anyLong(), anyString());
    assertEquals(high, count(mapper.leftOverDuplicateNodesIds()));
}
Also used : ArrayList(java.util.ArrayList) Collector(org.neo4j.internal.batchimport.input.Collector) IdMapper(org.neo4j.internal.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Example 4 with Collector

use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldDetectCorrectDuplicateInputIdsWhereManyAccidentalInManyGroups.

@Test
public void shouldDetectCorrectDuplicateInputIdsWhereManyAccidentalInManyGroups() {
    // GIVEN
    final ControlledEncoder encoder = new ControlledEncoder(new LongEncoder());
    final int idsPerGroup = 20;
    int groupCount = 5;
    for (int i = 0; i < groupCount; i++) {
        groups.getOrCreate("Group " + i);
    }
    IdMapper mapper = mapper(encoder, Radix.LONG, EncodingIdMapper.NO_MONITOR, ParallelSort.DEFAULT, numberOfCollisions -> new LongCollisionValues(NumberArrayFactories.HEAP, numberOfCollisions, INSTANCE));
    final AtomicReference<Group> group = new AtomicReference<>();
    PropertyValueLookup ids = (nodeId, cursorContext) -> {
        int groupId = toIntExact(nodeId / idsPerGroup);
        if (groupId == groupCount) {
            return null;
        }
        group.set(groups.get(groupId));
        // i.e. all first 10% in each group collides with all other first 10% in each group
        if (nodeId % idsPerGroup < 2) {
            // Let these colliding values encode into the same eId as well,
            // so that they are definitely marked as collisions
            encoder.useThisIdToEncodeNoMatterWhatComesIn(1234567L);
            return nodeId % idsPerGroup;
        }
        // The other 90% will be accidental collisions for something else
        encoder.useThisIdToEncodeNoMatterWhatComesIn((long) (123456 - group.get().id()));
        return nodeId;
    };
    // WHEN
    int count = idsPerGroup * groupCount;
    for (long nodeId = 0; nodeId < count; nodeId++) {
        mapper.put(ids.lookupProperty(nodeId, NULL), nodeId, group.get());
    }
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, NONE);
    // THEN
    verifyNoMoreInteractions(collector);
    for (long nodeId = 0; nodeId < count; nodeId++) {
        assertEquals(nodeId, mapper.get(ids.lookupProperty(nodeId, NULL), group.get()));
    }
    verifyNoMoreInteractions(collector);
    assertFalse(mapper.leftOverDuplicateNodesIds().hasNext());
}
Also used : ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) ArgumentMatchers.anyLong(org.mockito.ArgumentMatchers.anyLong) NO_MONITOR(org.neo4j.internal.batchimport.cache.idmapping.string.EncodingIdMapper.NO_MONITOR) NumberArrayFactories(org.neo4j.internal.batchimport.cache.NumberArrayFactories) Collector(org.neo4j.internal.batchimport.input.Collector) CursorContext(org.neo4j.io.pagecache.context.CursorContext) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) Groups(org.neo4j.internal.batchimport.input.Groups) Random(java.util.Random) DefaultPageCacheTracer(org.neo4j.io.pagecache.tracing.DefaultPageCacheTracer) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) RandomRule(org.neo4j.test.rule.RandomRule) Mockito.verifyNoMoreInteractions(org.mockito.Mockito.verifyNoMoreInteractions) MutableLong(org.apache.commons.lang3.mutable.MutableLong) IdMapper(org.neo4j.internal.batchimport.cache.idmapping.IdMapper) PageCacheTracer(org.neo4j.io.pagecache.tracing.PageCacheTracer) NULL(org.neo4j.io.pagecache.context.CursorContext.NULL) Assert.fail(org.junit.Assert.fail) Math.toIntExact(java.lang.Math.toIntExact) ProgressListener(org.neo4j.internal.helpers.progress.ProgressListener) NONE(org.neo4j.internal.helpers.progress.ProgressListener.NONE) Parameterized(org.junit.runners.Parameterized) LongFunction(java.util.function.LongFunction) PropertyValueLookup(org.neo4j.internal.batchimport.PropertyValueLookup) Collection(java.util.Collection) Set(java.util.Set) Test(org.junit.Test) Mockito.times(org.mockito.Mockito.times) UUID(java.util.UUID) Mockito.when(org.mockito.Mockito.when) Mockito.verify(org.mockito.Mockito.verify) AtomicLong(java.util.concurrent.atomic.AtomicLong) Factory(org.neo4j.function.Factory) GLOBAL(org.neo4j.internal.batchimport.input.Group.GLOBAL) List(java.util.List) INSTANCE(org.neo4j.memory.EmptyMemoryTracker.INSTANCE) Rule(org.junit.Rule) Group(org.neo4j.internal.batchimport.input.Group) Assert.assertFalse(org.junit.Assert.assertFalse) Race(org.neo4j.test.Race) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Mockito.mock(org.mockito.Mockito.mock) PrimitiveLongCollections.count(org.neo4j.collection.PrimitiveLongCollections.count) Group(org.neo4j.internal.batchimport.input.Group) PropertyValueLookup(org.neo4j.internal.batchimport.PropertyValueLookup) Collector(org.neo4j.internal.batchimport.input.Collector) IdMapper(org.neo4j.internal.batchimport.cache.idmapping.IdMapper) AtomicReference(java.util.concurrent.atomic.AtomicReference) Test(org.junit.Test)

Example 5 with Collector

use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldBeAbleToHaveDuplicateInputIdButInDifferentGroups.

@Test
public void shouldBeAbleToHaveDuplicateInputIdButInDifferentGroups() {
    // GIVEN
    EncodingIdMapper.Monitor monitor = mock(EncodingIdMapper.Monitor.class);
    Group firstGroup = groups.getOrCreate("first");
    Group secondGroup = groups.getOrCreate("second");
    IdMapper mapper = mapper(new StringEncoder(), Radix.STRING, monitor);
    PropertyValueLookup ids = values("10", "9", "10");
    int id = 0;
    // group 0
    mapper.put(ids.lookupProperty(id, NULL), id++, firstGroup);
    mapper.put(ids.lookupProperty(id, NULL), id++, firstGroup);
    // group 1
    mapper.put(ids.lookupProperty(id, NULL), id, secondGroup);
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, NONE);
    // WHEN/THEN
    verifyNoMoreInteractions(collector);
    verify(monitor).numberOfCollisions(0);
    assertEquals(0L, mapper.get("10", firstGroup));
    assertEquals(1L, mapper.get("9", firstGroup));
    assertEquals(2L, mapper.get("10", secondGroup));
    assertFalse(mapper.leftOverDuplicateNodesIds().hasNext());
}
Also used : Group(org.neo4j.internal.batchimport.input.Group) PropertyValueLookup(org.neo4j.internal.batchimport.PropertyValueLookup) Collector(org.neo4j.internal.batchimport.input.Collector) IdMapper(org.neo4j.internal.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Aggregations

Collector (org.neo4j.internal.batchimport.input.Collector)9 Test (org.junit.Test)8 IdMapper (org.neo4j.internal.batchimport.cache.idmapping.IdMapper)7 PropertyValueLookup (org.neo4j.internal.batchimport.PropertyValueLookup)6 Group (org.neo4j.internal.batchimport.input.Group)4 ArrayList (java.util.ArrayList)3 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)3 ProgressListener (org.neo4j.internal.helpers.progress.ProgressListener)3 Math.toIntExact (java.lang.Math.toIntExact)2 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Random (java.util.Random)2 Set (java.util.Set)2 UUID (java.util.UUID)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 LongFunction (java.util.function.LongFunction)2 MutableLong (org.apache.commons.lang3.mutable.MutableLong)2 Assert.assertEquals (org.junit.Assert.assertEquals)2