use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class CsvImporter method doImport.
@Override
public void doImport() throws IOException {
FileSystemAbstraction fs = outsideWorld.fileSystem();
File storeDir = config.get(DatabaseManagementSystemSettings.database_path);
File logsDir = config.get(GraphDatabaseSettings.logs_directory);
File reportFile = new File(reportFileName);
OutputStream badOutput = new BufferedOutputStream(fs.openAsOutputStream(reportFile, false));
Collector badCollector = badCollector(badOutput, isIgnoringSomething() ? BadCollector.UNLIMITED_TOLERANCE : 0, collect(ignoreBadRelationships, ignoreDuplicateNodes, ignoreExtraColumns));
Configuration configuration = importConfiguration(null, false, config);
CsvInput input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, false), badCollector, configuration.maxNumberOfProcessors());
ImportTool.doImport(outsideWorld.errorStream(), outsideWorld.errorStream(), storeDir, logsDir, reportFile, fs, nodesFiles, relationshipsFiles, false, input, config, badOutput, configuration);
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class CalculateDenseNodesStepTest method shouldCollectBadRelationships.
@Test
public void shouldCollectBadRelationships() throws Exception {
// GIVEN
NodeRelationshipCache cache = mock(NodeRelationshipCache.class);
Collector collector = mock(Collector.class);
try (CalculateDenseNodesStep step = new CalculateDenseNodesStep(mock(StageControl.class), DEFAULT, cache, collector)) {
step.processors(4);
step.start(0);
// WHEN
Batch<InputRelationship, RelationshipRecord> batch = batch(relationship(1, 5), relationship(3, 10), // <-- bad relationship with missing start node
relationship("a", 2, -1, 2), // <-- bad relationship with missing end node
relationship(2, "b", 2, -1), // <-- bad relationship with missing start and end node
relationship("c", "d", -1, -1));
step.receive(0, batch);
step.endOfUpstream();
while (!step.isCompleted()) {
//wait
}
// THEN
verify(collector, times(1)).collectBadRelationship(any(InputRelationship.class), eq("a"));
verify(collector, times(1)).collectBadRelationship(any(InputRelationship.class), eq("b"));
verify(collector, times(1)).collectBadRelationship(any(InputRelationship.class), eq("c"));
verify(collector, times(1)).collectBadRelationship(any(InputRelationship.class), eq("d"));
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class ExternalPropertiesDecoratorIT method shouldDecorateExternalPropertiesInParallelProcessingCsvInput.
@Test
public void shouldDecorateExternalPropertiesInParallelProcessingCsvInput() throws Exception {
// GIVEN
int processors = 5;
Collector collector = mock(Collector.class);
int count = 1000;
Configuration config = new Configuration.Overridden(Configuration.COMMAS) {
@Override
public int bufferSize() {
// 300 is empirically measured to roughly produce ~20 chunks
return 300;
}
};
IdType idType = IdType.STRING;
Decorator<InputNode> decorator = spy(new ExternalPropertiesDecorator(data(NO_NODE_DECORATOR, () -> decoratedData(count)), defaultFormatNodeFileHeader(), config, idType, UpdateBehaviour.ADD, collector));
Input input = new CsvInput(nodeData(data(decorator, () -> mainData(count))), defaultFormatNodeFileHeader(), null, null, idType, config, collector, processors);
// WHEN/THEN
try (InputIterator<InputNode> nodes = input.nodes().iterator()) {
int i = 0;
for (; i < count; i++) {
assertTrue(nodes.hasNext());
InputNode node = nodes.next();
// This property comes from decorator
assertHasProperty(node, "extra", node.id() + "-decorated");
if (i == 0) {
// This code is equal to nodes.setProcessors( processors ) (a method which doesn't exist)
nodes.processors(processors - nodes.processors(0));
}
}
assertEquals(count, i);
assertFalse(nodes.hasNext());
}
verify(decorator).close();
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class InputGroupsDeserializerTest method shouldCoordinateGroupCreationForParallelProcessing.
@Test
public void shouldCoordinateGroupCreationForParallelProcessing() throws Exception {
// GIVEN
List<DataFactory<InputNode>> data = new ArrayList<>();
int processors = Runtime.getRuntime().availableProcessors();
for (int i = 0; i < processors; i++) {
StringBuilder builder = new StringBuilder(":ID(Group" + i + ")");
for (int j = 0; j < 100; j++) {
builder.append("\n" + j);
}
data.add(data(builder.toString()));
}
Groups groups = new Groups();
IdType idType = IdType.INTEGER;
Collector badCollector = mock(Collector.class);
Configuration config = lowBufferSize(COMMAS, false);
DeserializerFactory<InputNode> factory = defaultNodeDeserializer(groups, config, idType, badCollector);
try (InputGroupsDeserializer<InputNode> deserializer = new InputGroupsDeserializer<>(data.iterator(), defaultFormatNodeFileHeader(), config, idType, processors, processors, factory, Validators.<InputNode>emptyValidator(), InputNode.class)) {
// WHEN
count(deserializer);
}
// THEN
assertEquals(processors, groups.getOrCreate("LastOne").id());
boolean[] seen = new boolean[processors];
for (int i = 0; i < processors; i++) {
String groupName = "Group" + i;
groups.getOrCreate(groupName);
assertFalse(seen[i]);
seen[i] = true;
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldBeAbleToHaveDuplicateInputIdButInDifferentGroups.
@Test
public void shouldBeAbleToHaveDuplicateInputIdButInDifferentGroups() throws Exception {
// GIVEN
Monitor monitor = mock(Monitor.class);
IdMapper mapper = mapper(new StringEncoder(), Radix.STRING, monitor);
InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9", "10"));
Groups groups = new Groups();
Group firstGroup = groups.getOrCreate("first"), secondGroup = groups.getOrCreate("second");
try (ResourceIterator<Object> iterator = ids.iterator()) {
int id = 0;
// group 0
mapper.put(iterator.next(), id++, firstGroup);
mapper.put(iterator.next(), id++, firstGroup);
// group 1
mapper.put(iterator.next(), id++, secondGroup);
}
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, NONE);
// WHEN/THEN
verifyNoMoreInteractions(collector);
verify(monitor).numberOfCollisions(0);
assertEquals(0L, mapper.get("10", firstGroup));
assertEquals(1L, mapper.get("9", firstGroup));
assertEquals(2L, mapper.get("10", secondGroup));
}
Aggregations