Search in sources :

Example 11 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class ImportTool method doImport.

public static void doImport(PrintStream out, PrintStream err, File storeDir, File logsDir, File badFile, FileSystemAbstraction fs, Collection<Option<File[]>> nodesFiles, Collection<Option<File[]>> relationshipsFiles, boolean enableStacktrace, Input input, Config dbConfig, OutputStream badOutput, org.neo4j.unsafe.impl.batchimport.Configuration configuration) throws IOException {
    boolean success;
    LifeSupport life = new LifeSupport();
    LogService logService = life.add(StoreLogService.inLogsDirectory(fs, logsDir));
    life.start();
    //TODO: add file watcher here?
    BatchImporter importer = new ParallelBatchImporter(storeDir, fs, configuration, logService, ExecutionMonitors.defaultVisible(), dbConfig);
    printOverview(storeDir, nodesFiles, relationshipsFiles, configuration, out);
    success = false;
    try {
        importer.doImport(input);
        success = true;
    } catch (Exception e) {
        throw andPrintError("Import error", e, enableStacktrace, err);
    } finally {
        Collector collector = input.badCollector();
        int numberOfBadEntries = collector.badEntries();
        collector.close();
        badOutput.close();
        if (numberOfBadEntries > 0) {
            out.println("There were bad entries which were skipped and logged into " + badFile.getAbsolutePath());
        }
        life.shutdown();
        if (!success) {
            try {
                StoreFile.fileOperation(FileOperation.DELETE, fs, storeDir, null, Iterables.<StoreFile, StoreFile>iterable(StoreFile.values()), false, ExistingTargetStrategy.FAIL, StoreFileType.values());
            } catch (IOException e) {
                err.println("Unable to delete store files after an aborted import " + e);
                if (enableStacktrace) {
                    e.printStackTrace();
                }
            }
        }
    }
}
Also used : ParallelBatchImporter(org.neo4j.unsafe.impl.batchimport.ParallelBatchImporter) BatchImporter(org.neo4j.unsafe.impl.batchimport.BatchImporter) ParallelBatchImporter(org.neo4j.unsafe.impl.batchimport.ParallelBatchImporter) BadCollector(org.neo4j.unsafe.impl.batchimport.input.BadCollector) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) LifeSupport(org.neo4j.kernel.lifecycle.LifeSupport) IOException(java.io.IOException) LogService(org.neo4j.kernel.impl.logging.LogService) StoreLogService(org.neo4j.kernel.impl.logging.StoreLogService) IllegalMultilineFieldException(org.neo4j.csv.reader.IllegalMultilineFieldException) MissingRelationshipDataException(org.neo4j.unsafe.impl.batchimport.input.MissingRelationshipDataException) DuplicateInputIdException(org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.DuplicateInputIdException) InputException(org.neo4j.unsafe.impl.batchimport.input.InputException) Exceptions.launderedException(org.neo4j.helpers.Exceptions.launderedException) IOException(java.io.IOException)

Example 12 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class CsvInputTest method shouldIgnoreEmptyExtraColumns.

@Test
public void shouldIgnoreEmptyExtraColumns() throws Exception {
    // GIVEN
    Iterable<DataFactory<InputNode>> data = DataFactories.nodeData(CsvInputTest.<InputNode>data(":ID,one\n" + "1,test,\n" + "2,test,,additional"));
    // WHEN
    Collector collector = mock(Collector.class);
    Input input = new CsvInput(data, defaultFormatNodeFileHeader(), null, null, IdType.INTEGER, config(COMMAS), collector, getRuntime().availableProcessors());
    // THEN
    try (InputIterator<InputNode> nodes = input.nodes().iterator()) {
        // THEN
        assertNode(nodes.next(), 1L, properties("one", "test"), labels());
        assertNode(nodes.next(), 2L, properties("one", "test"), labels());
        assertFalse(nodes.hasNext());
    }
    verify(collector, times(1)).collectExtraColumns(anyString(), eq(1L), eq((String) null));
    verify(collector, times(1)).collectExtraColumns(anyString(), eq(2L), eq((String) null));
    verify(collector, times(1)).collectExtraColumns(anyString(), eq(2L), eq("additional"));
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Input(org.neo4j.unsafe.impl.batchimport.input.Input) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.silentBadCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.silentBadCollector) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Example 13 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldCopeWithCollisionsBasedOnDifferentInputIds.

@Test
public void shouldCopeWithCollisionsBasedOnDifferentInputIds() throws Exception {
    // GIVEN
    Monitor monitor = mock(Monitor.class);
    Encoder encoder = mock(Encoder.class);
    when(encoder.encode(any())).thenReturn(12345L);
    IdMapper mapper = mapper(encoder, Radix.STRING, monitor);
    InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9"));
    try (ResourceIterator<Object> iterator = ids.iterator()) {
        for (int i = 0; iterator.hasNext(); i++) {
            mapper.put(iterator.next(), i, GLOBAL);
        }
    }
    // WHEN
    ProgressListener progress = mock(ProgressListener.class);
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, progress);
    // THEN
    verifyNoMoreInteractions(collector);
    verify(monitor).numberOfCollisions(2);
    assertEquals(0L, mapper.get("10", GLOBAL));
    assertEquals(1L, mapper.get("9", GLOBAL));
    // 7 times since SPLIT+SORT+DETECT+RESOLVE+SPLIT+SORT,DEDUPLICATE
    verify(progress, times(7)).started(anyString());
    verify(progress, times(7)).done();
}
Also used : Monitor(org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.EncodingIdMapper.Monitor) ProgressListener(org.neo4j.helpers.progress.ProgressListener) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Example 14 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldCopeWithMixedActualAndAccidentalCollisions.

@Test
public void shouldCopeWithMixedActualAndAccidentalCollisions() throws Exception {
    // GIVEN
    Monitor monitor = mock(Monitor.class);
    Encoder encoder = mock(Encoder.class);
    // Create these explicit instances so that we can use them in mock, even for same values
    String a = new String("a");
    String b = new String("b");
    String c = new String("c");
    String a2 = new String("a");
    String e = new String("e");
    String f = new String("f");
    when(encoder.encode(a)).thenReturn(1L);
    when(encoder.encode(b)).thenReturn(1L);
    when(encoder.encode(c)).thenReturn(3L);
    when(encoder.encode(a2)).thenReturn(1L);
    when(encoder.encode(e)).thenReturn(2L);
    when(encoder.encode(f)).thenReturn(1L);
    IdMapper mapper = mapper(encoder, Radix.STRING, monitor);
    InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("a", "b", "c", "a", "e", "f"));
    Group.Adapter groupA = new Group.Adapter(1, "A");
    Group.Adapter groupB = new Group.Adapter(2, "B");
    Group[] groups = new Group[] { groupA, groupA, groupA, groupB, groupB, groupB };
    // WHEN
    try (ResourceIterator<Object> iterator = ids.iterator()) {
        for (int i = 0; iterator.hasNext(); i++) {
            mapper.put(iterator.next(), i, groups[i]);
        }
    }
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, mock(ProgressListener.class));
    // THEN
    verify(monitor).numberOfCollisions(4);
    assertEquals(0L, mapper.get(a, groupA));
    assertEquals(1L, mapper.get(b, groupA));
    assertEquals(2L, mapper.get(c, groupA));
    assertEquals(3L, mapper.get(a2, groupB));
    assertEquals(4L, mapper.get(e, groupB));
    assertEquals(5L, mapper.get(f, groupB));
}
Also used : Group(org.neo4j.unsafe.impl.batchimport.input.Group) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Matchers.containsString(org.hamcrest.Matchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) Monitor(org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.EncodingIdMapper.Monitor) ProgressListener(org.neo4j.helpers.progress.ProgressListener) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) Test(org.junit.Test)

Example 15 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class ParallelInputEntityDeserializerTest method shouldTreatExternalCloseAsPanic.

// Timeout is so that if this bug strikes again it will only cause this test to run for a limited time
// before failing. Normally this test is really quick
@Test(timeout = 10_000)
public void shouldTreatExternalCloseAsPanic() throws Exception {
    // GIVEN enough data to fill up queues
    int entities = 500;
    Data<InputNode> data = testData(entities);
    Configuration config = new Configuration.Overridden(COMMAS) {

        @Override
        public int bufferSize() {
            return 100;
        }
    };
    IdType idType = ACTUAL;
    Collector badCollector = mock(Collector.class);
    Groups groups = new Groups();
    // WHEN closing before having consumed all results
    DeserializerFactory<InputNode> deserializerFactory = defaultNodeDeserializer(groups, config, idType, badCollector);
    try (ParallelInputEntityDeserializer<InputNode> deserializer = new ParallelInputEntityDeserializer<>(data, defaultFormatNodeFileHeader(), config, idType, 3, 3, deserializerFactory, Validators.<InputNode>emptyValidator(), InputNode.class)) {
        deserializer.hasNext();
        deserializer.receivePanic(new RuntimeException());
        // processed items so that it wants to go ahead and offer its result.
        for (int i = 0; i < 100 && deserializer.hasNext(); i++) {
            deserializer.next();
        }
    } catch (TaskExecutionPanicException e) {
    // THEN it should be able to exit (this exception comes as a side effect)
    }
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Groups(org.neo4j.unsafe.impl.batchimport.input.Groups) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) TaskExecutionPanicException(org.neo4j.unsafe.impl.batchimport.executor.TaskExecutionPanicException) Test(org.junit.Test)

Aggregations

Collector (org.neo4j.unsafe.impl.batchimport.input.Collector)16 Test (org.junit.Test)12 Collectors.badCollector (org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector)9 IdMapper (org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper)7 InputNode (org.neo4j.unsafe.impl.batchimport.input.InputNode)6 Groups (org.neo4j.unsafe.impl.batchimport.input.Groups)4 IOException (java.io.IOException)3 Monitor (org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.EncodingIdMapper.Monitor)3 BadCollector (org.neo4j.unsafe.impl.batchimport.input.BadCollector)3 Input (org.neo4j.unsafe.impl.batchimport.input.Input)3 BufferedOutputStream (java.io.BufferedOutputStream)2 File (java.io.File)2 OutputStream (java.io.OutputStream)2 ArrayList (java.util.ArrayList)2 Matchers.anyString (org.mockito.Matchers.anyString)2 PrimitiveLongIterator (org.neo4j.collection.primitive.PrimitiveLongIterator)2 ProgressListener (org.neo4j.helpers.progress.ProgressListener)2 FileSystemAbstraction (org.neo4j.io.fs.FileSystemAbstraction)2 TaskExecutionPanicException (org.neo4j.unsafe.impl.batchimport.executor.TaskExecutionPanicException)2 Group (org.neo4j.unsafe.impl.batchimport.input.Group)2