use of org.neo4j.csv.reader.CharSeeker in project neo4j by neo4j.
the class InputGroupsDeserializer method createNestedIterator.
@Override
protected InputIterator<ENTITY> createNestedIterator(DataFactory<ENTITY> dataFactory) {
closeCurrent();
// Open the data stream. It's closed by the batch importer when execution is done.
Data<ENTITY> data = dataFactory.create(config);
if (config.multilineFields()) {
// Use a single-threaded reading and parsing because if we can expect multi-line fields it's
// nearly impossible to deduce where one row ends and another starts when diving into
// an arbitrary position in the file.
CharSeeker dataStream = charSeeker(data.stream(), config, true);
// Read the header, given the data stream. This allows the header factory to be able to
// parse the header from the data stream directly. Or it can decide to grab the header
// from somewhere else, it's up to that factory.
Header dataHeader = headerFactory.create(dataStream, config, idType);
InputEntityDeserializer<ENTITY> input = factory.create(dataHeader, dataStream, data.decorator(), validator);
// It's important that we assign currentInput before calling initialize(), so that if something
// goes wrong in initialize() and our close() is called we close it properly.
currentInput = input;
currentInputOpen = true;
input.initialize();
} else {
// If the input fields aren't expected to contain multi-line fields we can do an optimization
// where we have one reader, reading chunks of data, handing over them to one or more parsing
// threads. The reader will read from its current position and N bytes ahead. When it gets there
// it will search backwards for the first new-line character and set the chunk end position
// to that position, effectively un-reading those characters back. This way each chunk will have
// complete rows of data and can be parsed individually by multiple threads.
currentInput = new ParallelInputEntityDeserializer<>(data, headerFactory, config, idType, maxProcessors, previousInputProcessors, factory, validator, entityClass);
currentInputOpen = true;
}
return currentInput;
}
use of org.neo4j.csv.reader.CharSeeker in project neo4j by neo4j.
the class DataFactoriesTest method shouldFailOnUnexpectedNodeHeaderType.
@Test
public void shouldFailOnUnexpectedNodeHeaderType() throws Exception {
// GIVEN
CharSeeker seeker = seeker(":ID,:START_ID");
IdType idType = IdType.ACTUAL;
// WHEN
try {
Header header = DataFactories.defaultFormatNodeFileHeader().create(seeker, COMMAS, idType);
fail("Should have failed");
} catch (InputException e) {
// THEN
assertThat(e.getMessage(), containsString("START_ID"));
}
}
use of org.neo4j.csv.reader.CharSeeker in project neo4j by neo4j.
the class DataFactoriesTest method shouldFailForDuplicatePropertyHeaderEntries.
@Test
public void shouldFailForDuplicatePropertyHeaderEntries() throws Exception {
// GIVEN
CharSeeker seeker = seeker("one:id\tname\tname:long");
IdType idType = IdType.ACTUAL;
Extractors extractors = new Extractors('\t');
// WHEN
try {
DataFactories.defaultFormatNodeFileHeader().create(seeker, TABS, idType);
fail("Should fail");
} catch (DuplicateHeaderException e) {
assertEquals(entry("name", Type.PROPERTY, extractors.string()), e.getFirst());
assertEquals(entry("name", Type.PROPERTY, extractors.long_()), e.getOther());
}
seeker.close();
}
use of org.neo4j.csv.reader.CharSeeker in project neo4j by neo4j.
the class DataFactoriesTest method shouldFailForDuplicateIdHeaderEntries.
@Test
public void shouldFailForDuplicateIdHeaderEntries() throws Exception {
// GIVEN
CharSeeker seeker = seeker("one:id\ttwo:id");
IdType idType = IdType.ACTUAL;
Extractors extractors = new Extractors('\t');
// WHEN
try {
DataFactories.defaultFormatNodeFileHeader().create(seeker, TABS, idType);
fail("Should fail");
} catch (DuplicateHeaderException e) {
assertEquals(entry("one", Type.ID, extractors.long_()), e.getFirst());
assertEquals(entry("two", Type.ID, extractors.long_()), e.getOther());
}
seeker.close();
}
use of org.neo4j.csv.reader.CharSeeker in project neo4j by neo4j.
the class DataFactoriesTest method shouldParseHeaderFromFirstLineOfFirstInputFile.
@Test
public void shouldParseHeaderFromFirstLineOfFirstInputFile() throws Exception {
// GIVEN
final Reader firstSource = new StringReader("id:ID\tname:String\tbirth_date:long");
final Reader secondSource = new StringReader("0\tThe node\t123456789");
DataFactory<InputNode> dataFactory = data(value -> value, () -> {
try {
return sources(firstSource, secondSource);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
Header.Factory headerFactory = defaultFormatNodeFileHeader();
Extractors extractors = new Extractors(';');
// WHEN
CharSeeker seeker = CharSeekers.charSeeker(dataFactory.create(TABS).stream(), TABS, false);
Header header = headerFactory.create(seeker, TABS, IdType.ACTUAL);
// THEN
assertArrayEquals(array(entry("id", Type.ID, extractors.long_()), entry("name", Type.PROPERTY, extractors.string()), entry("birth_date", Type.PROPERTY, extractors.long_())), header.entries());
seeker.close();
}
Aggregations