use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.
the class CsvInputTest method shouldNotReportNoRelationshipTypeIfDecorated.
@Test
public void shouldNotReportNoRelationshipTypeIfDecorated() {
// given
String sourceDescription = "source";
Supplier<CharReadable> headerSource = () -> wrap(dataWithSourceDescription(":START_ID,:END_ID", sourceDescription), 3);
Iterable<DataFactory> data = datas(config -> new Data() {
@Override
public RawIterator<CharReadable, IOException> stream() {
return asRawIterator(iterator(headerSource.get()));
}
@Override
public Decorator decorator() {
return defaultRelationshipType("MyType");
}
});
CsvInput.Monitor monitor = mock(CsvInput.Monitor.class);
// when
new CsvInput(datas(), defaultFormatNodeFileHeader(), data, defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, monitor, INSTANCE);
// then
verify(monitor, never()).noRelationshipTypeSpecified(sourceDescription);
}
use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.
the class CsvInputTest method shouldNotReportNoNodeLabelsIfDecorated.
@Test
public void shouldNotReportNoNodeLabelsIfDecorated() {
// given
String sourceDescription = "source";
Supplier<CharReadable> headerSource = () -> wrap(dataWithSourceDescription(":ID", sourceDescription), 3);
Iterable<DataFactory> data = datas(config -> new Data() {
@Override
public RawIterator<CharReadable, IOException> stream() {
return asRawIterator(iterator(headerSource.get()));
}
@Override
public Decorator decorator() {
return additiveLabels(new String[] { "MyLabel" });
}
});
CsvInput.Monitor monitor = mock(CsvInput.Monitor.class);
// when
new CsvInput(data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, monitor, INSTANCE);
// then
verify(monitor, never()).noRelationshipTypeSpecified(sourceDescription);
}
use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.
the class CsvInputTest method shouldReportNoRelationshipType.
@Test
public void shouldReportNoRelationshipType() {
// given
String sourceDescription = "source";
Supplier<CharReadable> headerSource = () -> wrap(dataWithSourceDescription(":START_ID,:END_ID", sourceDescription), 3);
Iterable<DataFactory> data = datas(config -> new Data() {
@Override
public RawIterator<CharReadable, IOException> stream() {
return asRawIterator(iterator(headerSource.get()));
}
@Override
public Decorator decorator() {
return NO_DECORATOR;
}
});
CsvInput.Monitor monitor = mock(CsvInput.Monitor.class);
// when
new CsvInput(datas(), defaultFormatNodeFileHeader(), data, defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, monitor, INSTANCE);
// then
verify(monitor).noRelationshipTypeSpecified(sourceDescription);
}
use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.
the class CsvInput method sample.
private long[] sample(Iterable<DataFactory> dataFactories, Header.Factory headerFactory, PropertySizeCalculator valueSizeCalculator, ToIntFunction<InputEntity> additionalCalculator) throws IOException {
// [entity count, property count, property size, labels (for nodes only)]
long[] estimates = new long[4];
try (CsvInputChunkProxy chunk = new CsvInputChunkProxy()) {
// One group of input files
int groupId = 0;
for (// one input group
DataFactory dataFactory : // one input group
dataFactories) {
groupId++;
Header header = null;
Data data = dataFactory.create(config);
RawIterator<CharReadable, IOException> sources = data.stream();
while (sources.hasNext()) {
try (CharReadable source = sources.next()) {
if (header == null) {
// Extract the header from the first file in this group
// This is the only place we monitor type normalization because it's before import and it touches all headers
header = extractHeader(source, headerFactory, idType, config, groups, monitor);
}
try (CsvInputIterator iterator = new CsvInputIterator(source, data.decorator(), header, config, idType, EMPTY, CsvGroupInputIterator.extractors(config), groupId);
InputEntity entity = new InputEntity()) {
int entities = 0;
int properties = 0;
int propertySize = 0;
int additional = 0;
while (iterator.position() < ESTIMATE_SAMPLE_SIZE && iterator.next(chunk)) {
for (; chunk.next(entity); entities++) {
properties += entity.propertyCount();
propertySize += Inputs.calculatePropertySize(entity, valueSizeCalculator, NULL, memoryTracker);
additional += additionalCalculator.applyAsInt(entity);
}
}
if (entities > 0) {
long position = iterator.position();
double compressionRatio = iterator.compressionRatio();
double actualFileSize = source.length() / compressionRatio;
long entityCountInSource = (long) ((actualFileSize / position) * entities);
estimates[0] += entityCountInSource;
estimates[1] += ((double) properties / entities) * entityCountInSource;
estimates[2] += ((double) propertySize / entities) * entityCountInSource;
estimates[3] += ((double) additional / entities) * entityCountInSource;
}
}
}
}
}
}
return estimates;
}
use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.
the class CsvInputTest method shouldCloseDataIteratorsInTheEnd.
@Test
public void shouldCloseDataIteratorsInTheEnd() throws Exception {
// GIVEN
CharReadable nodeData = charReader("1");
CharReadable relationshipData = charReader("1,1");
IdType idType = IdType.STRING;
Iterable<DataFactory<InputNode>> nodeDataIterable = dataIterable(given(nodeData));
Iterable<DataFactory<InputRelationship>> relationshipDataIterable = dataIterable(data(relationshipData, defaultRelationshipType("TYPE")));
Input input = new CsvInput(nodeDataIterable, header(entry(null, Type.ID, idType.extractor(extractors))), relationshipDataIterable, header(entry(null, Type.START_ID, idType.extractor(extractors)), entry(null, Type.END_ID, idType.extractor(extractors))), idType, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
// WHEN
try (ResourceIterator<InputNode> iterator = input.nodes().iterator()) {
iterator.next();
}
try (ResourceIterator<InputRelationship> iterator = input.relationships().iterator()) {
iterator.next();
}
// THEN
assertClosed(nodeData);
assertClosed(relationshipData);
}
Aggregations