use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.
the class CsvImporter method doImport.
@Override
public void doImport() throws IOException {
if (force) {
fileSystem.deleteRecursively(databaseLayout.databaseDirectory());
fileSystem.deleteRecursively(databaseLayout.getTransactionLogsDirectory());
}
try (OutputStream badOutput = fileSystem.openAsOutputStream(reportFile, false);
Collector badCollector = getBadCollector(skipBadEntriesLogging, badOutput)) {
// Extract the default time zone from the database configuration
ZoneId dbTimeZone = databaseConfig.get(GraphDatabaseSettings.db_temporal_timezone);
Supplier<ZoneId> defaultTimeZone = () -> dbTimeZone;
final var nodeData = nodeData();
final var relationshipsData = relationshipData();
CsvInput input = new CsvInput(nodeData, defaultFormatNodeFileHeader(defaultTimeZone, normalizeTypes), relationshipsData, defaultFormatRelationshipFileHeader(defaultTimeZone, normalizeTypes), idType, csvConfig, new CsvInput.PrintingMonitor(stdOut), memoryTracker);
doImport(input, badCollector);
}
}
use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.
the class CsvInputTest method shouldIgnoreEmptyExtraColumns.
@Test
public void shouldIgnoreEmptyExtraColumns() throws Exception {
// GIVEN
Iterable<DataFactory> data = datas(CsvInputTest.data(":ID,one\n" + "1,test,\n" + "2,test,,additional"));
// WHEN
Collector collector = mock(Collector.class);
Input input = new CsvInput(data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.INTEGER, config(), NO_MONITOR, INSTANCE);
// THEN
try (InputIterator nodes = input.nodes(collector).iterator()) {
// THEN
assertNextNode(nodes, 1L, properties("one", "test"), labels());
assertNextNode(nodes, 2L, properties("one", "test"), labels());
assertFalse(readNext(nodes));
}
verify(collector).collectExtraColumns(anyString(), eq(1L), eq(null));
verify(collector).collectExtraColumns(anyString(), eq(2L), eq(null));
verify(collector).collectExtraColumns(anyString(), eq(2L), eq("additional"));
}
use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldHandleLargeAmountsOfDuplicateNodeIds.
@Test
public void shouldHandleLargeAmountsOfDuplicateNodeIds() {
// GIVEN
IdMapper mapper = mapper(new LongEncoder(), Radix.LONG, EncodingIdMapper.NO_MONITOR);
long nodeId = 0;
int high = 10;
// a list of input ids
List<Object> ids = new ArrayList<>();
for (int run = 0; run < 2; run++) {
for (long i = 0; i < high / 2; i++) {
ids.add(high - (i + 1));
ids.add(i);
}
}
// fed to the IdMapper
for (Object inputId : ids) {
mapper.put(inputId, nodeId++, Group.GLOBAL);
}
// WHEN
Collector collector = mock(Collector.class);
mapper.prepare(values(ids.toArray()), collector, NONE);
// THEN
verify(collector, times(high)).collectDuplicateNode(any(Object.class), anyLong(), anyString());
assertEquals(high, count(mapper.leftOverDuplicateNodesIds()));
}
use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldDetectCorrectDuplicateInputIdsWhereManyAccidentalInManyGroups.
@Test
public void shouldDetectCorrectDuplicateInputIdsWhereManyAccidentalInManyGroups() {
// GIVEN
final ControlledEncoder encoder = new ControlledEncoder(new LongEncoder());
final int idsPerGroup = 20;
int groupCount = 5;
for (int i = 0; i < groupCount; i++) {
groups.getOrCreate("Group " + i);
}
IdMapper mapper = mapper(encoder, Radix.LONG, EncodingIdMapper.NO_MONITOR, ParallelSort.DEFAULT, numberOfCollisions -> new LongCollisionValues(NumberArrayFactories.HEAP, numberOfCollisions, INSTANCE));
final AtomicReference<Group> group = new AtomicReference<>();
PropertyValueLookup ids = (nodeId, cursorContext) -> {
int groupId = toIntExact(nodeId / idsPerGroup);
if (groupId == groupCount) {
return null;
}
group.set(groups.get(groupId));
// i.e. all first 10% in each group collides with all other first 10% in each group
if (nodeId % idsPerGroup < 2) {
// Let these colliding values encode into the same eId as well,
// so that they are definitely marked as collisions
encoder.useThisIdToEncodeNoMatterWhatComesIn(1234567L);
return nodeId % idsPerGroup;
}
// The other 90% will be accidental collisions for something else
encoder.useThisIdToEncodeNoMatterWhatComesIn((long) (123456 - group.get().id()));
return nodeId;
};
// WHEN
int count = idsPerGroup * groupCount;
for (long nodeId = 0; nodeId < count; nodeId++) {
mapper.put(ids.lookupProperty(nodeId, NULL), nodeId, group.get());
}
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, NONE);
// THEN
verifyNoMoreInteractions(collector);
for (long nodeId = 0; nodeId < count; nodeId++) {
assertEquals(nodeId, mapper.get(ids.lookupProperty(nodeId, NULL), group.get()));
}
verifyNoMoreInteractions(collector);
assertFalse(mapper.leftOverDuplicateNodesIds().hasNext());
}
use of org.neo4j.internal.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldBeAbleToHaveDuplicateInputIdButInDifferentGroups.
@Test
public void shouldBeAbleToHaveDuplicateInputIdButInDifferentGroups() {
// GIVEN
EncodingIdMapper.Monitor monitor = mock(EncodingIdMapper.Monitor.class);
Group firstGroup = groups.getOrCreate("first");
Group secondGroup = groups.getOrCreate("second");
IdMapper mapper = mapper(new StringEncoder(), Radix.STRING, monitor);
PropertyValueLookup ids = values("10", "9", "10");
int id = 0;
// group 0
mapper.put(ids.lookupProperty(id, NULL), id++, firstGroup);
mapper.put(ids.lookupProperty(id, NULL), id++, firstGroup);
// group 1
mapper.put(ids.lookupProperty(id, NULL), id, secondGroup);
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, NONE);
// WHEN/THEN
verifyNoMoreInteractions(collector);
verify(monitor).numberOfCollisions(0);
assertEquals(0L, mapper.get("10", firstGroup));
assertEquals(1L, mapper.get("9", firstGroup));
assertEquals(2L, mapper.get("10", secondGroup));
assertFalse(mapper.leftOverDuplicateNodesIds().hasNext());
}
Aggregations