Examples with BadCollector - org.neo4j.unsafe.impl.batchimport.input.BadCollector

Example 1 with BadCollector

use of org.neo4j.unsafe.impl.batchimport.input.BadCollector in project neo4j by neo4j.

the class CsvImporter method doImport.

@Override
public void doImport() throws IOException {
    FileSystemAbstraction fs = outsideWorld.fileSystem();
    File storeDir = config.get(DatabaseManagementSystemSettings.database_path);
    File logsDir = config.get(GraphDatabaseSettings.logs_directory);
    File reportFile = new File(reportFileName);
    OutputStream badOutput = new BufferedOutputStream(fs.openAsOutputStream(reportFile, false));
    Collector badCollector = badCollector(badOutput, isIgnoringSomething() ? BadCollector.UNLIMITED_TOLERANCE : 0, collect(ignoreBadRelationships, ignoreDuplicateNodes, ignoreExtraColumns));
    Configuration configuration = importConfiguration(null, false, config);
    CsvInput input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, false), badCollector, configuration.maxNumberOfProcessors());
    ImportTool.doImport(outsideWorld.errorStream(), outsideWorld.errorStream(), storeDir, logsDir, reportFile, fs, nodesFiles, relationshipsFiles, false, input, config, badOutput, configuration);
}

Also used : FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) ImportTool.importConfiguration(org.neo4j.tooling.ImportTool.importConfiguration) Configuration(org.neo4j.unsafe.impl.batchimport.Configuration) ImportTool.csvConfiguration(org.neo4j.tooling.ImportTool.csvConfiguration) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) BadCollector(org.neo4j.unsafe.impl.batchimport.input.BadCollector) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) CsvInput(org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Example 2 with BadCollector

use of org.neo4j.unsafe.impl.batchimport.input.BadCollector in project neo4j by neo4j.

the class ImportPanicIT method shouldExitAndThrowExceptionOnPanic.

/**
     * There was this problem where some steps and in particular parallel CSV input parsing that
     * paniced would hang the import entirely.
     */
@Test
public void shouldExitAndThrowExceptionOnPanic() throws Exception {
    // GIVEN
    BatchImporter importer = new ParallelBatchImporter(directory.absolutePath(), fs, Configuration.DEFAULT, NullLogService.getInstance(), ExecutionMonitors.invisible(), AdditionalInitialIds.EMPTY, Config.empty(), StandardV3_0.RECORD_FORMATS);
    Iterable<DataFactory<InputNode>> nodeData = nodeData(data(NO_NODE_DECORATOR, fileAsCharReadable(nodeCsvFileWithBrokenEntries())));
    Input brokenCsvInput = new CsvInput(nodeData, defaultFormatNodeFileHeader(), relationshipData(), defaultFormatRelationshipFileHeader(), IdType.ACTUAL, csvConfigurationWithLowBufferSize(), new BadCollector(new NullOutputStream(), 0, 0), Runtime.getRuntime().availableProcessors());
    // WHEN
    try {
        importer.doImport(brokenCsvInput);
        fail("Should have failed properly");
    } catch (InputException e) {
        // THEN
        assertTrue(e.getCause() instanceof DataAfterQuoteException);
    // and we managed to shut down properly
    }
}

Also used : CsvInput(org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput) Input(org.neo4j.unsafe.impl.batchimport.input.Input) BadCollector(org.neo4j.unsafe.impl.batchimport.input.BadCollector) InputException(org.neo4j.unsafe.impl.batchimport.input.InputException) DataFactory(org.neo4j.unsafe.impl.batchimport.input.csv.DataFactory) CsvInput(org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput) DataAfterQuoteException(org.neo4j.csv.reader.DataAfterQuoteException) Test(org.junit.Test)

Example 3 with BadCollector

use of org.neo4j.unsafe.impl.batchimport.input.BadCollector in project neo4j by neo4j.

the class ImportTool method main.

/**
     * Runs the import tool given the supplied arguments.
     *
     * @param incomingArguments arguments for specifying input and configuration for the import.
     * @param defaultSettingsSuitableForTests default configuration geared towards unit/integration
     * test environments, for example lower default buffer sizes.
     */
public static void main(String[] incomingArguments, boolean defaultSettingsSuitableForTests) throws IOException {
    System.err.println("WARNING: neo4j-import is deprecated and support for it will be removed in a future\n" + "version of Neo4j; please use neo4j-admin import instead.\n");
    PrintStream out = System.out;
    PrintStream err = System.err;
    Args args = Args.parse(incomingArguments);
    if (ArrayUtil.isEmpty(incomingArguments) || asksForUsage(args)) {
        printUsage(out);
        return;
    }
    File storeDir;
    Collection<Option<File[]>> nodesFiles, relationshipsFiles;
    boolean enableStacktrace;
    Number processors = null;
    Input input = null;
    int badTolerance;
    Charset inputEncoding;
    boolean skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns;
    Config dbConfig;
    OutputStream badOutput = null;
    IdType idType = null;
    int pageSize = UNSPECIFIED;
    Collector badCollector;
    org.neo4j.unsafe.impl.batchimport.Configuration configuration = null;
    File logsDir;
    File badFile;
    boolean success = false;
    try (FileSystemAbstraction fs = new DefaultFileSystemAbstraction()) {
        storeDir = args.interpretOption(Options.STORE_DIR.key(), Converters.<File>mandatory(), Converters.toFile(), Validators.DIRECTORY_IS_WRITABLE, Validators.CONTAINS_NO_EXISTING_DATABASE);
        Config config = Config.defaults();
        config.augment(stringMap(GraphDatabaseSettings.neo4j_home.name(), storeDir.getAbsolutePath()));
        logsDir = config.get(GraphDatabaseSettings.logs_directory);
        fs.mkdirs(logsDir);
        badFile = new File(storeDir, BAD_FILE_NAME);
        badOutput = new BufferedOutputStream(fs.openAsOutputStream(badFile, false));
        nodesFiles = extractInputFiles(args, Options.NODE_DATA.key(), err);
        relationshipsFiles = extractInputFiles(args, Options.RELATIONSHIP_DATA.key(), err);
        validateInputFiles(nodesFiles, relationshipsFiles);
        enableStacktrace = args.getBoolean(Options.STACKTRACE.key(), Boolean.FALSE, Boolean.TRUE);
        processors = args.getNumber(Options.PROCESSORS.key(), null);
        idType = args.interpretOption(Options.ID_TYPE.key(), withDefault((IdType) Options.ID_TYPE.defaultValue()), TO_ID_TYPE);
        badTolerance = parseNumberOrUnlimited(args, Options.BAD_TOLERANCE);
        inputEncoding = Charset.forName(args.get(Options.INPUT_ENCODING.key(), defaultCharset().name()));
        skipBadRelationships = args.getBoolean(Options.SKIP_BAD_RELATIONSHIPS.key(), (Boolean) Options.SKIP_BAD_RELATIONSHIPS.defaultValue(), true);
        skipDuplicateNodes = args.getBoolean(Options.SKIP_DUPLICATE_NODES.key(), (Boolean) Options.SKIP_DUPLICATE_NODES.defaultValue(), true);
        ignoreExtraColumns = args.getBoolean(Options.IGNORE_EXTRA_COLUMNS.key(), (Boolean) Options.IGNORE_EXTRA_COLUMNS.defaultValue(), true);
        badCollector = badCollector(badOutput, badTolerance, collect(skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns));
        dbConfig = loadDbConfig(args.interpretOption(Options.DATABASE_CONFIG.key(), Converters.<File>optional(), Converters.toFile(), Validators.REGEX_FILE_EXISTS));
        configuration = importConfiguration(processors, defaultSettingsSuitableForTests, dbConfig, pageSize);
        input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, defaultSettingsSuitableForTests), badCollector, configuration.maxNumberOfProcessors());
        doImport(out, err, storeDir, logsDir, badFile, fs, nodesFiles, relationshipsFiles, enableStacktrace, input, dbConfig, badOutput, configuration);
        success = true;
    } catch (IllegalArgumentException e) {
        throw andPrintError("Input error", e, false, err);
    } catch (IOException e) {
        throw andPrintError("File error", e, false, err);
    } finally {
        if (!success && badOutput != null) {
            badOutput.close();
        }
    }
}

Also used : DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) Config(org.neo4j.kernel.configuration.Config) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) CsvInput(org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput) Input(org.neo4j.unsafe.impl.batchimport.input.Input) BadCollector(org.neo4j.unsafe.impl.batchimport.input.BadCollector) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) BufferedOutputStream(java.io.BufferedOutputStream) PrintStream(java.io.PrintStream) Args(org.neo4j.helpers.Args) DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) Charset.defaultCharset(java.nio.charset.Charset.defaultCharset) Charset(java.nio.charset.Charset) CsvInput(org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput) IOException(java.io.IOException) IdType(org.neo4j.unsafe.impl.batchimport.input.csv.IdType) Option(org.neo4j.helpers.Args.Option) StoreFile(org.neo4j.kernel.impl.storemigration.StoreFile) File(java.io.File)

Aggregations

BadCollector (org.neo4j.unsafe.impl.batchimport.input.BadCollector)3 CsvInput (org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput)3 BufferedOutputStream (java.io.BufferedOutputStream)2 File (java.io.File)2 OutputStream (java.io.OutputStream)2 FileSystemAbstraction (org.neo4j.io.fs.FileSystemAbstraction)2 Collector (org.neo4j.unsafe.impl.batchimport.input.Collector)2 Collectors.badCollector (org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector)2 Input (org.neo4j.unsafe.impl.batchimport.input.Input)2 IOException (java.io.IOException)1 PrintStream (java.io.PrintStream)1 Charset (java.nio.charset.Charset)1 Charset.defaultCharset (java.nio.charset.Charset.defaultCharset)1 Test (org.junit.Test)1 DataAfterQuoteException (org.neo4j.csv.reader.DataAfterQuoteException)1 Args (org.neo4j.helpers.Args)1 Option (org.neo4j.helpers.Args.Option)1 DefaultFileSystemAbstraction (org.neo4j.io.fs.DefaultFileSystemAbstraction)1 Config (org.neo4j.kernel.configuration.Config)1 StoreFile (org.neo4j.kernel.impl.storemigration.StoreFile)1