use of org.neo4j.unsafe.impl.batchimport.input.BadCollector in project neo4j by neo4j.
the class CsvImporter method doImport.
@Override
public void doImport() throws IOException {
FileSystemAbstraction fs = outsideWorld.fileSystem();
File storeDir = config.get(DatabaseManagementSystemSettings.database_path);
File logsDir = config.get(GraphDatabaseSettings.logs_directory);
File reportFile = new File(reportFileName);
OutputStream badOutput = new BufferedOutputStream(fs.openAsOutputStream(reportFile, false));
Collector badCollector = badCollector(badOutput, isIgnoringSomething() ? BadCollector.UNLIMITED_TOLERANCE : 0, collect(ignoreBadRelationships, ignoreDuplicateNodes, ignoreExtraColumns));
Configuration configuration = importConfiguration(null, false, config);
CsvInput input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, false), badCollector, configuration.maxNumberOfProcessors());
ImportTool.doImport(outsideWorld.errorStream(), outsideWorld.errorStream(), storeDir, logsDir, reportFile, fs, nodesFiles, relationshipsFiles, false, input, config, badOutput, configuration);
}
use of org.neo4j.unsafe.impl.batchimport.input.BadCollector in project neo4j by neo4j.
the class ImportPanicIT method shouldExitAndThrowExceptionOnPanic.
/**
* There was this problem where some steps and in particular parallel CSV input parsing that
* paniced would hang the import entirely.
*/
@Test
public void shouldExitAndThrowExceptionOnPanic() throws Exception {
// GIVEN
BatchImporter importer = new ParallelBatchImporter(directory.absolutePath(), fs, Configuration.DEFAULT, NullLogService.getInstance(), ExecutionMonitors.invisible(), AdditionalInitialIds.EMPTY, Config.empty(), StandardV3_0.RECORD_FORMATS);
Iterable<DataFactory<InputNode>> nodeData = nodeData(data(NO_NODE_DECORATOR, fileAsCharReadable(nodeCsvFileWithBrokenEntries())));
Input brokenCsvInput = new CsvInput(nodeData, defaultFormatNodeFileHeader(), relationshipData(), defaultFormatRelationshipFileHeader(), IdType.ACTUAL, csvConfigurationWithLowBufferSize(), new BadCollector(new NullOutputStream(), 0, 0), Runtime.getRuntime().availableProcessors());
// WHEN
try {
importer.doImport(brokenCsvInput);
fail("Should have failed properly");
} catch (InputException e) {
// THEN
assertTrue(e.getCause() instanceof DataAfterQuoteException);
// and we managed to shut down properly
}
}
use of org.neo4j.unsafe.impl.batchimport.input.BadCollector in project neo4j by neo4j.
the class ImportTool method main.
/**
* Runs the import tool given the supplied arguments.
*
* @param incomingArguments arguments for specifying input and configuration for the import.
* @param defaultSettingsSuitableForTests default configuration geared towards unit/integration
* test environments, for example lower default buffer sizes.
*/
public static void main(String[] incomingArguments, boolean defaultSettingsSuitableForTests) throws IOException {
System.err.println("WARNING: neo4j-import is deprecated and support for it will be removed in a future\n" + "version of Neo4j; please use neo4j-admin import instead.\n");
PrintStream out = System.out;
PrintStream err = System.err;
Args args = Args.parse(incomingArguments);
if (ArrayUtil.isEmpty(incomingArguments) || asksForUsage(args)) {
printUsage(out);
return;
}
File storeDir;
Collection<Option<File[]>> nodesFiles, relationshipsFiles;
boolean enableStacktrace;
Number processors = null;
Input input = null;
int badTolerance;
Charset inputEncoding;
boolean skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns;
Config dbConfig;
OutputStream badOutput = null;
IdType idType = null;
int pageSize = UNSPECIFIED;
Collector badCollector;
org.neo4j.unsafe.impl.batchimport.Configuration configuration = null;
File logsDir;
File badFile;
boolean success = false;
try (FileSystemAbstraction fs = new DefaultFileSystemAbstraction()) {
storeDir = args.interpretOption(Options.STORE_DIR.key(), Converters.<File>mandatory(), Converters.toFile(), Validators.DIRECTORY_IS_WRITABLE, Validators.CONTAINS_NO_EXISTING_DATABASE);
Config config = Config.defaults();
config.augment(stringMap(GraphDatabaseSettings.neo4j_home.name(), storeDir.getAbsolutePath()));
logsDir = config.get(GraphDatabaseSettings.logs_directory);
fs.mkdirs(logsDir);
badFile = new File(storeDir, BAD_FILE_NAME);
badOutput = new BufferedOutputStream(fs.openAsOutputStream(badFile, false));
nodesFiles = extractInputFiles(args, Options.NODE_DATA.key(), err);
relationshipsFiles = extractInputFiles(args, Options.RELATIONSHIP_DATA.key(), err);
validateInputFiles(nodesFiles, relationshipsFiles);
enableStacktrace = args.getBoolean(Options.STACKTRACE.key(), Boolean.FALSE, Boolean.TRUE);
processors = args.getNumber(Options.PROCESSORS.key(), null);
idType = args.interpretOption(Options.ID_TYPE.key(), withDefault((IdType) Options.ID_TYPE.defaultValue()), TO_ID_TYPE);
badTolerance = parseNumberOrUnlimited(args, Options.BAD_TOLERANCE);
inputEncoding = Charset.forName(args.get(Options.INPUT_ENCODING.key(), defaultCharset().name()));
skipBadRelationships = args.getBoolean(Options.SKIP_BAD_RELATIONSHIPS.key(), (Boolean) Options.SKIP_BAD_RELATIONSHIPS.defaultValue(), true);
skipDuplicateNodes = args.getBoolean(Options.SKIP_DUPLICATE_NODES.key(), (Boolean) Options.SKIP_DUPLICATE_NODES.defaultValue(), true);
ignoreExtraColumns = args.getBoolean(Options.IGNORE_EXTRA_COLUMNS.key(), (Boolean) Options.IGNORE_EXTRA_COLUMNS.defaultValue(), true);
badCollector = badCollector(badOutput, badTolerance, collect(skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns));
dbConfig = loadDbConfig(args.interpretOption(Options.DATABASE_CONFIG.key(), Converters.<File>optional(), Converters.toFile(), Validators.REGEX_FILE_EXISTS));
configuration = importConfiguration(processors, defaultSettingsSuitableForTests, dbConfig, pageSize);
input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, defaultSettingsSuitableForTests), badCollector, configuration.maxNumberOfProcessors());
doImport(out, err, storeDir, logsDir, badFile, fs, nodesFiles, relationshipsFiles, enableStacktrace, input, dbConfig, badOutput, configuration);
success = true;
} catch (IllegalArgumentException e) {
throw andPrintError("Input error", e, false, err);
} catch (IOException e) {
throw andPrintError("File error", e, false, err);
} finally {
if (!success && badOutput != null) {
badOutput.close();
}
}
}
Aggregations