use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldDetectCorrectDuplicateInputIdsWhereManyAccidentalInManyGroups.
@Test
public void shouldDetectCorrectDuplicateInputIdsWhereManyAccidentalInManyGroups() throws Exception {
// GIVEN
final ControlledEncoder encoder = new ControlledEncoder(new LongEncoder());
IdMapper mapper = mapper(encoder, Radix.LONG, NO_MONITOR);
final int idsPerGroup = 20, groups = 5;
final AtomicReference<Group> group = new AtomicReference<>();
InputIterable<Object> ids = SimpleInputIteratorWrapper.wrap("source", new Iterable<Object>() {
@Override
public Iterator<Object> iterator() {
return new PrefetchingIterator<Object>() {
private int i;
@Override
protected Object fetchNextOrNull() {
// Change group every <idsPerGroup> id
if (i % idsPerGroup == 0) {
int groupId = i / idsPerGroup;
if (groupId == groups) {
return null;
}
group.set(new Group.Adapter(groupId, "Group " + groupId));
}
try {
// i.e. all first 10% in each group collides with all other first 10% in each group
if (i % idsPerGroup < 2) {
// Let these colliding values encode into the same eId as well,
// so that they are definitely marked as collisions
encoder.useThisIdToEncodeNoMatterWhatComesIn(Long.valueOf(1234567));
return Long.valueOf(i % idsPerGroup);
}
// The other 90% will be accidental collisions for something else
encoder.useThisIdToEncodeNoMatterWhatComesIn(Long.valueOf(123456 - group.get().id()));
return Long.valueOf(i);
} finally {
i++;
}
}
};
}
});
// WHEN
long actualId = 0;
for (Object id : ids) {
mapper.put(id, actualId++, group.get());
}
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, NONE);
// THEN
verifyNoMoreInteractions(collector);
actualId = 0;
for (Object id : ids) {
assertEquals(actualId++, mapper.get(id, group.get()));
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldHandleLargeAmountsOfDuplicateNodeIds.
@Test
public void shouldHandleLargeAmountsOfDuplicateNodeIds() throws Exception {
// GIVEN
IdMapper mapper = mapper(new LongEncoder(), Radix.LONG, NO_MONITOR);
long nodeId = 0;
int high = 10;
// a list of input ids
List<Object> ids = new ArrayList<>();
for (int run = 0; run < 2; run++) {
for (long i = 0; i < high / 2; i++) {
ids.add(high - (i + 1));
ids.add(i);
}
}
// fed to the IdMapper
for (Object inputId : ids) {
mapper.put(inputId, nodeId++, GLOBAL);
}
// WHEN
Collector collector = mock(Collector.class);
mapper.prepare(SimpleInputIteratorWrapper.wrap("source", ids), collector, NONE);
// THEN
verify(collector, times(high)).collectDuplicateNode(any(Object.class), anyLong(), anyString(), anyString(), anyString());
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class EncodingIdMapperTest method shouldReportCollisionsForSameInputId.
@Test
public void shouldReportCollisionsForSameInputId() throws Exception {
// GIVEN
IdMapper mapper = mapper(new StringEncoder(), Radix.STRING, NO_MONITOR);
InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9", "10"));
try (ResourceIterator<Object> iterator = ids.iterator()) {
for (int i = 0; iterator.hasNext(); i++) {
mapper.put(iterator.next(), i, GLOBAL);
}
}
// WHEN
Collector collector = mock(Collector.class);
mapper.prepare(ids, collector, NONE);
// THEN
verify(collector, times(1)).collectDuplicateNode("10", 2, GLOBAL.name(), "source:1", "source:3");
verifyNoMoreInteractions(collector);
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class ImportTool method main.
/**
* Runs the import tool given the supplied arguments.
*
* @param incomingArguments arguments for specifying input and configuration for the import.
* @param defaultSettingsSuitableForTests default configuration geared towards unit/integration
* test environments, for example lower default buffer sizes.
*/
public static void main(String[] incomingArguments, boolean defaultSettingsSuitableForTests) throws IOException {
System.err.println("WARNING: neo4j-import is deprecated and support for it will be removed in a future\n" + "version of Neo4j; please use neo4j-admin import instead.\n");
PrintStream out = System.out;
PrintStream err = System.err;
Args args = Args.parse(incomingArguments);
if (ArrayUtil.isEmpty(incomingArguments) || asksForUsage(args)) {
printUsage(out);
return;
}
File storeDir;
Collection<Option<File[]>> nodesFiles, relationshipsFiles;
boolean enableStacktrace;
Number processors = null;
Input input = null;
int badTolerance;
Charset inputEncoding;
boolean skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns;
Config dbConfig;
OutputStream badOutput = null;
IdType idType = null;
int pageSize = UNSPECIFIED;
Collector badCollector;
org.neo4j.unsafe.impl.batchimport.Configuration configuration = null;
File logsDir;
File badFile;
boolean success = false;
try (FileSystemAbstraction fs = new DefaultFileSystemAbstraction()) {
storeDir = args.interpretOption(Options.STORE_DIR.key(), Converters.<File>mandatory(), Converters.toFile(), Validators.DIRECTORY_IS_WRITABLE, Validators.CONTAINS_NO_EXISTING_DATABASE);
Config config = Config.defaults();
config.augment(stringMap(GraphDatabaseSettings.neo4j_home.name(), storeDir.getAbsolutePath()));
logsDir = config.get(GraphDatabaseSettings.logs_directory);
fs.mkdirs(logsDir);
badFile = new File(storeDir, BAD_FILE_NAME);
badOutput = new BufferedOutputStream(fs.openAsOutputStream(badFile, false));
nodesFiles = extractInputFiles(args, Options.NODE_DATA.key(), err);
relationshipsFiles = extractInputFiles(args, Options.RELATIONSHIP_DATA.key(), err);
validateInputFiles(nodesFiles, relationshipsFiles);
enableStacktrace = args.getBoolean(Options.STACKTRACE.key(), Boolean.FALSE, Boolean.TRUE);
processors = args.getNumber(Options.PROCESSORS.key(), null);
idType = args.interpretOption(Options.ID_TYPE.key(), withDefault((IdType) Options.ID_TYPE.defaultValue()), TO_ID_TYPE);
badTolerance = parseNumberOrUnlimited(args, Options.BAD_TOLERANCE);
inputEncoding = Charset.forName(args.get(Options.INPUT_ENCODING.key(), defaultCharset().name()));
skipBadRelationships = args.getBoolean(Options.SKIP_BAD_RELATIONSHIPS.key(), (Boolean) Options.SKIP_BAD_RELATIONSHIPS.defaultValue(), true);
skipDuplicateNodes = args.getBoolean(Options.SKIP_DUPLICATE_NODES.key(), (Boolean) Options.SKIP_DUPLICATE_NODES.defaultValue(), true);
ignoreExtraColumns = args.getBoolean(Options.IGNORE_EXTRA_COLUMNS.key(), (Boolean) Options.IGNORE_EXTRA_COLUMNS.defaultValue(), true);
badCollector = badCollector(badOutput, badTolerance, collect(skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns));
dbConfig = loadDbConfig(args.interpretOption(Options.DATABASE_CONFIG.key(), Converters.<File>optional(), Converters.toFile(), Validators.REGEX_FILE_EXISTS));
configuration = importConfiguration(processors, defaultSettingsSuitableForTests, dbConfig, pageSize);
input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, defaultSettingsSuitableForTests), badCollector, configuration.maxNumberOfProcessors());
doImport(out, err, storeDir, logsDir, badFile, fs, nodesFiles, relationshipsFiles, enableStacktrace, input, dbConfig, badOutput, configuration);
success = true;
} catch (IllegalArgumentException e) {
throw andPrintError("Input error", e, false, err);
} catch (IOException e) {
throw andPrintError("File error", e, false, err);
} finally {
if (!success && badOutput != null) {
badOutput.close();
}
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.
the class ImportTool method doImport.
public static void doImport(PrintStream out, PrintStream err, File storeDir, File logsDir, File badFile, FileSystemAbstraction fs, Collection<Option<File[]>> nodesFiles, Collection<Option<File[]>> relationshipsFiles, boolean enableStacktrace, Input input, Config dbConfig, OutputStream badOutput, org.neo4j.unsafe.impl.batchimport.Configuration configuration) throws IOException {
boolean success;
LifeSupport life = new LifeSupport();
LogService logService = life.add(StoreLogService.inLogsDirectory(fs, logsDir));
life.start();
//TODO: add file watcher here?
BatchImporter importer = new ParallelBatchImporter(storeDir, fs, configuration, logService, ExecutionMonitors.defaultVisible(), dbConfig);
printOverview(storeDir, nodesFiles, relationshipsFiles, configuration, out);
success = false;
try {
importer.doImport(input);
success = true;
} catch (Exception e) {
throw andPrintError("Import error", e, enableStacktrace, err);
} finally {
Collector collector = input.badCollector();
int numberOfBadEntries = collector.badEntries();
collector.close();
badOutput.close();
if (numberOfBadEntries > 0) {
out.println("There were bad entries which were skipped and logged into " + badFile.getAbsolutePath());
}
life.shutdown();
if (!success) {
try {
StoreFile.fileOperation(FileOperation.DELETE, fs, storeDir, null, Iterables.<StoreFile, StoreFile>iterable(StoreFile.values()), false, ExistingTargetStrategy.FAIL, StoreFileType.values());
} catch (IOException e) {
err.println("Unable to delete store files after an aborted import " + e);
if (enableStacktrace) {
e.printStackTrace();
}
}
}
}
}
Aggregations