Search in sources :

Example 1 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class CsvImporter method doImport.

@Override
public void doImport() throws IOException {
    FileSystemAbstraction fs = outsideWorld.fileSystem();
    File storeDir = config.get(DatabaseManagementSystemSettings.database_path);
    File logsDir = config.get(GraphDatabaseSettings.logs_directory);
    File reportFile = new File(reportFileName);
    OutputStream badOutput = new BufferedOutputStream(fs.openAsOutputStream(reportFile, false));
    Collector badCollector = badCollector(badOutput, isIgnoringSomething() ? BadCollector.UNLIMITED_TOLERANCE : 0, collect(ignoreBadRelationships, ignoreDuplicateNodes, ignoreExtraColumns));
    Configuration configuration = importConfiguration(null, false, config);
    CsvInput input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, false), badCollector, configuration.maxNumberOfProcessors());
    ImportTool.doImport(outsideWorld.errorStream(), outsideWorld.errorStream(), storeDir, logsDir, reportFile, fs, nodesFiles, relationshipsFiles, false, input, config, badOutput, configuration);
}
Also used : FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) ImportTool.importConfiguration(org.neo4j.tooling.ImportTool.importConfiguration) Configuration(org.neo4j.unsafe.impl.batchimport.Configuration) ImportTool.csvConfiguration(org.neo4j.tooling.ImportTool.csvConfiguration) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) BadCollector(org.neo4j.unsafe.impl.batchimport.input.BadCollector) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) CsvInput(org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Example 2 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class CalculateDenseNodesStepTest method shouldCollectBadRelationships.

@Test
public void shouldCollectBadRelationships() throws Exception {
    // GIVEN
    NodeRelationshipCache cache = mock(NodeRelationshipCache.class);
    Collector collector = mock(Collector.class);
    try (CalculateDenseNodesStep step = new CalculateDenseNodesStep(mock(StageControl.class), DEFAULT, cache, collector)) {
        step.processors(4);
        step.start(0);
        // WHEN
        Batch<InputRelationship, RelationshipRecord> batch = batch(relationship(1, 5), relationship(3, 10), // <-- bad relationship with missing start node
        relationship("a", 2, -1, 2), // <-- bad relationship with missing end node
        relationship(2, "b", 2, -1), // <-- bad relationship with missing start and end node
        relationship("c", "d", -1, -1));
        step.receive(0, batch);
        step.endOfUpstream();
        while (!step.isCompleted()) {
        //wait
        }
        // THEN
        verify(collector, times(1)).collectBadRelationship(any(InputRelationship.class), eq("a"));
        verify(collector, times(1)).collectBadRelationship(any(InputRelationship.class), eq("b"));
        verify(collector, times(1)).collectBadRelationship(any(InputRelationship.class), eq("c"));
        verify(collector, times(1)).collectBadRelationship(any(InputRelationship.class), eq("d"));
    }
}
Also used : NodeRelationshipCache(org.neo4j.unsafe.impl.batchimport.cache.NodeRelationshipCache) StageControl(org.neo4j.unsafe.impl.batchimport.staging.StageControl) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) RelationshipRecord(org.neo4j.kernel.impl.store.record.RelationshipRecord) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) Test(org.junit.Test)

Example 3 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class ExternalPropertiesDecoratorIT method shouldDecorateExternalPropertiesInParallelProcessingCsvInput.

@Test
public void shouldDecorateExternalPropertiesInParallelProcessingCsvInput() throws Exception {
    // GIVEN
    int processors = 5;
    Collector collector = mock(Collector.class);
    int count = 1000;
    Configuration config = new Configuration.Overridden(Configuration.COMMAS) {

        @Override
        public int bufferSize() {
            // 300 is empirically measured to roughly produce ~20 chunks
            return 300;
        }
    };
    IdType idType = IdType.STRING;
    Decorator<InputNode> decorator = spy(new ExternalPropertiesDecorator(data(NO_NODE_DECORATOR, () -> decoratedData(count)), defaultFormatNodeFileHeader(), config, idType, UpdateBehaviour.ADD, collector));
    Input input = new CsvInput(nodeData(data(decorator, () -> mainData(count))), defaultFormatNodeFileHeader(), null, null, idType, config, collector, processors);
    // WHEN/THEN
    try (InputIterator<InputNode> nodes = input.nodes().iterator()) {
        int i = 0;
        for (; i < count; i++) {
            assertTrue(nodes.hasNext());
            InputNode node = nodes.next();
            // This property comes from decorator
            assertHasProperty(node, "extra", node.id() + "-decorated");
            if (i == 0) {
                // This code is equal to nodes.setProcessors( processors ) (a method which doesn't exist)
                nodes.processors(processors - nodes.processors(0));
            }
        }
        assertEquals(count, i);
        assertFalse(nodes.hasNext());
    }
    verify(decorator).close();
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Input(org.neo4j.unsafe.impl.batchimport.input.Input) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Test(org.junit.Test)

Example 4 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class InputGroupsDeserializerTest method shouldCoordinateGroupCreationForParallelProcessing.

@Test
public void shouldCoordinateGroupCreationForParallelProcessing() throws Exception {
    // GIVEN
    List<DataFactory<InputNode>> data = new ArrayList<>();
    int processors = Runtime.getRuntime().availableProcessors();
    for (int i = 0; i < processors; i++) {
        StringBuilder builder = new StringBuilder(":ID(Group" + i + ")");
        for (int j = 0; j < 100; j++) {
            builder.append("\n" + j);
        }
        data.add(data(builder.toString()));
    }
    Groups groups = new Groups();
    IdType idType = IdType.INTEGER;
    Collector badCollector = mock(Collector.class);
    Configuration config = lowBufferSize(COMMAS, false);
    DeserializerFactory<InputNode> factory = defaultNodeDeserializer(groups, config, idType, badCollector);
    try (InputGroupsDeserializer<InputNode> deserializer = new InputGroupsDeserializer<>(data.iterator(), defaultFormatNodeFileHeader(), config, idType, processors, processors, factory, Validators.<InputNode>emptyValidator(), InputNode.class)) {
        // WHEN
        count(deserializer);
    }
    // THEN
    assertEquals(processors, groups.getOrCreate("LastOne").id());
    boolean[] seen = new boolean[processors];
    for (int i = 0; i < processors; i++) {
        String groupName = "Group" + i;
        groups.getOrCreate(groupName);
        assertFalse(seen[i]);
        seen[i] = true;
    }
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) ArrayList(java.util.ArrayList) Groups(org.neo4j.unsafe.impl.batchimport.input.Groups) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Test(org.junit.Test)

Example 5 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldBeAbleToHaveDuplicateInputIdButInDifferentGroups.

@Test
public void shouldBeAbleToHaveDuplicateInputIdButInDifferentGroups() throws Exception {
    // GIVEN
    Monitor monitor = mock(Monitor.class);
    IdMapper mapper = mapper(new StringEncoder(), Radix.STRING, monitor);
    InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9", "10"));
    Groups groups = new Groups();
    Group firstGroup = groups.getOrCreate("first"), secondGroup = groups.getOrCreate("second");
    try (ResourceIterator<Object> iterator = ids.iterator()) {
        int id = 0;
        // group 0
        mapper.put(iterator.next(), id++, firstGroup);
        mapper.put(iterator.next(), id++, firstGroup);
        // group 1
        mapper.put(iterator.next(), id++, secondGroup);
    }
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, NONE);
    // WHEN/THEN
    verifyNoMoreInteractions(collector);
    verify(monitor).numberOfCollisions(0);
    assertEquals(0L, mapper.get("10", firstGroup));
    assertEquals(1L, mapper.get("9", firstGroup));
    assertEquals(2L, mapper.get("10", secondGroup));
}
Also used : Group(org.neo4j.unsafe.impl.batchimport.input.Group) Monitor(org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.EncodingIdMapper.Monitor) Groups(org.neo4j.unsafe.impl.batchimport.input.Groups) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Aggregations

Collector (org.neo4j.unsafe.impl.batchimport.input.Collector)16 Test (org.junit.Test)12 Collectors.badCollector (org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector)9 IdMapper (org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper)7 InputNode (org.neo4j.unsafe.impl.batchimport.input.InputNode)6 Groups (org.neo4j.unsafe.impl.batchimport.input.Groups)4 IOException (java.io.IOException)3 Monitor (org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.EncodingIdMapper.Monitor)3 BadCollector (org.neo4j.unsafe.impl.batchimport.input.BadCollector)3 Input (org.neo4j.unsafe.impl.batchimport.input.Input)3 BufferedOutputStream (java.io.BufferedOutputStream)2 File (java.io.File)2 OutputStream (java.io.OutputStream)2 ArrayList (java.util.ArrayList)2 Matchers.anyString (org.mockito.Matchers.anyString)2 PrimitiveLongIterator (org.neo4j.collection.primitive.PrimitiveLongIterator)2 ProgressListener (org.neo4j.helpers.progress.ProgressListener)2 FileSystemAbstraction (org.neo4j.io.fs.FileSystemAbstraction)2 TaskExecutionPanicException (org.neo4j.unsafe.impl.batchimport.executor.TaskExecutionPanicException)2 Group (org.neo4j.unsafe.impl.batchimport.input.Group)2