Search in sources :

Example 1 with Extractors

use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.

the class DataFactoriesTest method shouldParseDefaultNodeFileHeaderCorrectly.

@Test
public void shouldParseDefaultNodeFileHeaderCorrectly() throws Exception {
    // GIVEN
    CharSeeker seeker = seeker("ID:ID,label-one:label,also-labels:LABEL,name,age:long");
    IdType idType = IdType.STRING;
    Extractors extractors = new Extractors(',');
    // WHEN
    Header header = DataFactories.defaultFormatNodeFileHeader().create(seeker, COMMAS, idType);
    // THEN
    assertArrayEquals(array(entry("ID", Type.ID, idType.extractor(extractors)), entry("label-one", Type.LABEL, extractors.stringArray()), entry("also-labels", Type.LABEL, extractors.stringArray()), entry("name", Type.PROPERTY, extractors.string()), entry("age", Type.PROPERTY, extractors.long_())), header.entries());
    seeker.close();
}
Also used : Extractors(org.neo4j.csv.reader.Extractors) CharSeeker(org.neo4j.csv.reader.CharSeeker) DataFactories.defaultFormatNodeFileHeader(org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader) Test(org.junit.Test)

Example 2 with Extractors

use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.

the class DataFactoriesTest method shouldAllowMissingIdHeaderEntry.

@Test
public void shouldAllowMissingIdHeaderEntry() throws Exception {
    // GIVEN
    CharSeeker seeker = seeker("one\ttwo");
    Extractors extractors = new Extractors(';');
    // WHEN
    Header header = DataFactories.defaultFormatNodeFileHeader().create(seeker, TABS, IdType.ACTUAL);
    // THEN
    assertArrayEquals(array(entry("one", Type.PROPERTY, extractors.string()), entry("two", Type.PROPERTY, extractors.string())), header.entries());
    seeker.close();
}
Also used : Extractors(org.neo4j.csv.reader.Extractors) CharSeeker(org.neo4j.csv.reader.CharSeeker) DataFactories.defaultFormatNodeFileHeader(org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader) Test(org.junit.Test)

Example 3 with Extractors

use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.

the class DataFactoriesTest method shouldHaveEmptyHeadersBeInterpretedAsIgnored.

@Test
public void shouldHaveEmptyHeadersBeInterpretedAsIgnored() throws Exception {
    // GIVEN
    CharSeeker seeker = seeker("one:id\ttwo\t\tdate:long");
    IdType idType = IdType.ACTUAL;
    Extractors extractors = new Extractors('\t');
    // WHEN
    Header header = DataFactories.defaultFormatNodeFileHeader().create(seeker, TABS, idType);
    // THEN
    assertArrayEquals(array(entry("one", Type.ID, extractors.long_()), entry("two", Type.PROPERTY, extractors.string()), entry(null, Type.IGNORE, null), entry("date", Type.PROPERTY, extractors.long_())), header.entries());
    seeker.close();
}
Also used : Extractors(org.neo4j.csv.reader.Extractors) CharSeeker(org.neo4j.csv.reader.CharSeeker) DataFactories.defaultFormatNodeFileHeader(org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader) Test(org.junit.Test)

Example 4 with Extractors

use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.

the class InputEntityDeserializer method deserializeNextFromSource.

private boolean deserializeNextFromSource() throws IOException {
    Header.Entry[] entries = header.entries();
    if (entries.length == 0) {
        return false;
    }
    int fieldIndex = 0;
    try {
        for (; fieldIndex < entries.length; fieldIndex++) {
            // Seek the next value
            if (!data.seek(mark, delimiter)) {
                if (fieldIndex > 0) {
                    throw new UnexpectedEndOfInputException("Near " + mark);
                }
                // We're just at the end
                return false;
            }
            // Extract it, type according to our header
            Header.Entry entry = entries[fieldIndex];
            if (entry.type() != Type.IGNORE) {
                Object value = data.tryExtract(mark, entry.extractor()) ? entry.extractor().value() : null;
                deserialization.handle(entry, value);
            }
            if (mark.isEndOfLine()) {
                // We're at the end of the line, break and return an entity with what we have.
                break;
            }
        }
        return true;
    } catch (final RuntimeException e) {
        String stringValue = null;
        try {
            Extractors extractors = new Extractors('?');
            if (data.tryExtract(mark, extractors.string())) {
                stringValue = extractors.string().value();
            }
        } catch (Exception e1) {
        // OK
        }
        String message = format("ERROR in input" + "%n  data source: %s" + "%n  in field: %s" + "%n  for header: %s" + "%n  raw field value: %s" + "%n  original error: %s", data, entries[fieldIndex] + ":" + (fieldIndex + 1), header, stringValue != null ? stringValue : "??", e.getMessage());
        if (e instanceof InputException) {
            throw Exceptions.withMessage(e, message);
        }
        throw new InputException(message, e);
    }
}
Also used : Extractors(org.neo4j.csv.reader.Extractors) UnexpectedEndOfInputException(org.neo4j.unsafe.impl.batchimport.input.UnexpectedEndOfInputException) UnexpectedEndOfInputException(org.neo4j.unsafe.impl.batchimport.input.UnexpectedEndOfInputException) InputException(org.neo4j.unsafe.impl.batchimport.input.InputException) UnexpectedEndOfInputException(org.neo4j.unsafe.impl.batchimport.input.UnexpectedEndOfInputException) InputException(org.neo4j.unsafe.impl.batchimport.input.InputException) IOException(java.io.IOException)

Example 5 with Extractors

use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.

the class DataFactoriesTest method shouldParseGroupName.

@Test
public void shouldParseGroupName() throws Exception {
    // GIVEN
    CharSeeker seeker = seeker(":START_ID(GroupOne)\t:END_ID(GroupTwo)\ttype:TYPE\tdate:long\tmore:long[]");
    IdType idType = IdType.ACTUAL;
    Extractors extractors = new Extractors('\t');
    // WHEN
    Header header = DataFactories.defaultFormatRelationshipFileHeader().create(seeker, TABS, idType);
    // THEN
    assertArrayEquals(array(entry(null, Type.START_ID, "GroupOne", idType.extractor(extractors)), entry(null, Type.END_ID, "GroupTwo", idType.extractor(extractors)), entry("type", Type.TYPE, extractors.string()), entry("date", Type.PROPERTY, extractors.long_()), entry("more", Type.PROPERTY, extractors.longArray())), header.entries());
    seeker.close();
}
Also used : Extractors(org.neo4j.csv.reader.Extractors) CharSeeker(org.neo4j.csv.reader.CharSeeker) DataFactories.defaultFormatNodeFileHeader(org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader) Test(org.junit.Test)

Aggregations

Extractors (org.neo4j.csv.reader.Extractors)22 CharSeeker (org.neo4j.csv.reader.CharSeeker)18 Test (org.junit.jupiter.api.Test)11 IdType (org.neo4j.internal.batchimport.input.IdType)10 Test (org.junit.Test)8 DataFactories.defaultFormatNodeFileHeader (org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader)8 DataFactories.defaultFormatRelationshipFileHeader (org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatRelationshipFileHeader)8 DataFactories.defaultFormatNodeFileHeader (org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader)6 IOException (java.io.IOException)3 ParallelBatchImporter (org.neo4j.internal.batchimport.ParallelBatchImporter)2 DataGeneratorInput (org.neo4j.internal.batchimport.input.DataGeneratorInput)2 Input (org.neo4j.internal.batchimport.input.Input)2 JobScheduler (org.neo4j.scheduler.JobScheduler)2 DuplicateHeaderException (org.neo4j.unsafe.impl.batchimport.input.DuplicateHeaderException)2 Reader (java.io.Reader)1 StringReader (java.io.StringReader)1 Path (java.nio.file.Path)1 Config (org.neo4j.configuration.Config)1 CharReadable (org.neo4j.csv.reader.CharReadable)1 Configuration (org.neo4j.csv.reader.Configuration)1