use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class ImportTool method doImport.
public static void doImport(PrintStream out, PrintStream err, File storeDir, File logsDir, File badFile, FileSystemAbstraction fs, Collection<Option<File[]>> nodesFiles, Collection<Option<File[]>> relationshipsFiles, boolean enableStacktrace, Input input, Config dbConfig, OutputStream badOutput, org.neo4j.unsafe.impl.batchimport.Configuration configuration) throws IOException {
boolean success;
LifeSupport life = new LifeSupport();
LogService logService = life.add(StoreLogService.inLogsDirectory(fs, logsDir));
life.start();
//TODO: add file watcher here?
BatchImporter importer = new ParallelBatchImporter(storeDir, fs, configuration, logService, ExecutionMonitors.defaultVisible(), dbConfig);
printOverview(storeDir, nodesFiles, relationshipsFiles, configuration, out);
success = false;
try {
importer.doImport(input);
success = true;
} catch (Exception e) {
throw andPrintError("Import error", e, enableStacktrace, err);
} finally {
Collector collector = input.badCollector();
int numberOfBadEntries = collector.badEntries();
collector.close();
badOutput.close();
if (numberOfBadEntries > 0) {
out.println("There were bad entries which were skipped and logged into " + badFile.getAbsolutePath());
}
life.shutdown();
if (!success) {
try {
StoreFile.fileOperation(FileOperation.DELETE, fs, storeDir, null, Iterables.<StoreFile, StoreFile>iterable(StoreFile.values()), false, ExistingTargetStrategy.FAIL, StoreFileType.values());
} catch (IOException e) {
err.println("Unable to delete store files after an aborted import " + e);
if (enableStacktrace) {
e.printStackTrace();
}
}
}
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class CsvInputTest method shouldIgnoreEmptyExtraColumns.
@Test
public void shouldIgnoreEmptyExtraColumns() throws Exception {
// GIVEN
Iterable<DataFactory<InputNode>> data = DataFactories.nodeData(CsvInputTest.<InputNode>data(":ID,one\n" + "1,test,\n" + "2,test,,additional"));
// WHEN
Collector collector = mock(Collector.class);
Input input = new CsvInput(data, defaultFormatNodeFileHeader(), null, null, IdType.INTEGER, config(COMMAS), collector, getRuntime().availableProcessors());
// THEN
try (InputIterator<InputNode> nodes = input.nodes().iterator()) {
// THEN
assertNode(nodes.next(), 1L, properties("one", "test"), labels());
assertNode(nodes.next(), 2L, properties("one", "test"), labels());
assertFalse(nodes.hasNext());
}
verify(collector, times(1)).collectExtraColumns(anyString(), eq(1L), eq((String) null));
verify(collector, times(1)).collectExtraColumns(anyString(), eq(2L), eq((String) null));
verify(collector, times(1)).collectExtraColumns(anyString(), eq(2L), eq("additional"));
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldCopeWithCollisionsBasedOnDifferentInputIds.
@Test
public void shouldCopeWithCollisionsBasedOnDifferentInputIds() throws Exception {
// GIVEN
Monitor monitor = mock(Monitor.class);
Encoder encoder = mock(Encoder.class);
when(encoder.encode(any())).thenReturn(12345L);
IdMapper mapper = mapper(encoder, Radix.STRING, monitor);
InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9"));
try (ResourceIterator<Object> iterator = ids.iterator()) {
for (int i = 0; iterator.hasNext(); i++) {
mapper.put(iterator.next(), i, GLOBAL);
}
}
// WHEN
ProgressListener progress = mock(ProgressListener.class);
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, progress);
// THEN
verifyNoMoreInteractions(collector);
verify(monitor).numberOfCollisions(2);
assertEquals(0L, mapper.get("10", GLOBAL));
assertEquals(1L, mapper.get("9", GLOBAL));
// 7 times since SPLIT+SORT+DETECT+RESOLVE+SPLIT+SORT,DEDUPLICATE
verify(progress, times(7)).started(anyString());
verify(progress, times(7)).done();
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldCopeWithMixedActualAndAccidentalCollisions.
@Test
public void shouldCopeWithMixedActualAndAccidentalCollisions() throws Exception {
// GIVEN
Monitor monitor = mock(Monitor.class);
Encoder encoder = mock(Encoder.class);
// Create these explicit instances so that we can use them in mock, even for same values
String a = new String("a");
String b = new String("b");
String c = new String("c");
String a2 = new String("a");
String e = new String("e");
String f = new String("f");
when(encoder.encode(a)).thenReturn(1L);
when(encoder.encode(b)).thenReturn(1L);
when(encoder.encode(c)).thenReturn(3L);
when(encoder.encode(a2)).thenReturn(1L);
when(encoder.encode(e)).thenReturn(2L);
when(encoder.encode(f)).thenReturn(1L);
IdMapper mapper = mapper(encoder, Radix.STRING, monitor);
InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("a", "b", "c", "a", "e", "f"));
Group.Adapter groupA = new Group.Adapter(1, "A");
Group.Adapter groupB = new Group.Adapter(2, "B");
Group[] groups = new Group[] { groupA, groupA, groupA, groupB, groupB, groupB };
// WHEN
try (ResourceIterator<Object> iterator = ids.iterator()) {
for (int i = 0; iterator.hasNext(); i++) {
mapper.put(iterator.next(), i, groups[i]);
}
}
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, mock(ProgressListener.class));
// THEN
verify(monitor).numberOfCollisions(4);
assertEquals(0L, mapper.get(a, groupA));
assertEquals(1L, mapper.get(b, groupA));
assertEquals(2L, mapper.get(c, groupA));
assertEquals(3L, mapper.get(a2, groupB));
assertEquals(4L, mapper.get(e, groupB));
assertEquals(5L, mapper.get(f, groupB));
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class ParallelInputEntityDeserializerTest method shouldTreatExternalCloseAsPanic.
// Timeout is so that if this bug strikes again it will only cause this test to run for a limited time
// before failing. Normally this test is really quick
@Test(timeout = 10_000)
public void shouldTreatExternalCloseAsPanic() throws Exception {
// GIVEN enough data to fill up queues
int entities = 500;
Data<InputNode> data = testData(entities);
Configuration config = new Configuration.Overridden(COMMAS) {
@Override
public int bufferSize() {
return 100;
}
};
IdType idType = ACTUAL;
Collector badCollector = mock(Collector.class);
Groups groups = new Groups();
// WHEN closing before having consumed all results
DeserializerFactory<InputNode> deserializerFactory = defaultNodeDeserializer(groups, config, idType, badCollector);
try (ParallelInputEntityDeserializer<InputNode> deserializer = new ParallelInputEntityDeserializer<>(data, defaultFormatNodeFileHeader(), config, idType, 3, 3, deserializerFactory, Validators.<InputNode>emptyValidator(), InputNode.class)) {
deserializer.hasNext();
deserializer.receivePanic(new RuntimeException());
// processed items so that it wants to go ahead and offer its result.
for (int i = 0; i < 100 && deserializer.hasNext(); i++) {
deserializer.next();
}
} catch (TaskExecutionPanicException e) {
// THEN it should be able to exit (this exception comes as a side effect)
}
}
Aggregations