Search in sources :

Example 1 with CharReadable

use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.

the class CsvInputTest method shouldNotReportNoRelationshipTypeIfDecorated.

@Test
public void shouldNotReportNoRelationshipTypeIfDecorated() {
    // given
    String sourceDescription = "source";
    Supplier<CharReadable> headerSource = () -> wrap(dataWithSourceDescription(":START_ID,:END_ID", sourceDescription), 3);
    Iterable<DataFactory> data = datas(config -> new Data() {

        @Override
        public RawIterator<CharReadable, IOException> stream() {
            return asRawIterator(iterator(headerSource.get()));
        }

        @Override
        public Decorator decorator() {
            return defaultRelationshipType("MyType");
        }
    });
    CsvInput.Monitor monitor = mock(CsvInput.Monitor.class);
    // when
    new CsvInput(datas(), defaultFormatNodeFileHeader(), data, defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, monitor, INSTANCE);
    // then
    verify(monitor, never()).noRelationshipTypeSpecified(sourceDescription);
}
Also used : CharReadable(org.neo4j.csv.reader.CharReadable) Matchers.containsString(org.hamcrest.Matchers.containsString) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) RawIterator(org.neo4j.collection.RawIterator) Iterators.asRawIterator(org.neo4j.internal.helpers.collection.Iterators.asRawIterator) Test(org.junit.Test)

Example 2 with CharReadable

use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.

the class CsvInputTest method shouldNotReportNoNodeLabelsIfDecorated.

@Test
public void shouldNotReportNoNodeLabelsIfDecorated() {
    // given
    String sourceDescription = "source";
    Supplier<CharReadable> headerSource = () -> wrap(dataWithSourceDescription(":ID", sourceDescription), 3);
    Iterable<DataFactory> data = datas(config -> new Data() {

        @Override
        public RawIterator<CharReadable, IOException> stream() {
            return asRawIterator(iterator(headerSource.get()));
        }

        @Override
        public Decorator decorator() {
            return additiveLabels(new String[] { "MyLabel" });
        }
    });
    CsvInput.Monitor monitor = mock(CsvInput.Monitor.class);
    // when
    new CsvInput(data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, monitor, INSTANCE);
    // then
    verify(monitor, never()).noRelationshipTypeSpecified(sourceDescription);
}
Also used : CharReadable(org.neo4j.csv.reader.CharReadable) Matchers.containsString(org.hamcrest.Matchers.containsString) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) RawIterator(org.neo4j.collection.RawIterator) Iterators.asRawIterator(org.neo4j.internal.helpers.collection.Iterators.asRawIterator) Test(org.junit.Test)

Example 3 with CharReadable

use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.

the class CsvInputTest method shouldReportNoRelationshipType.

@Test
public void shouldReportNoRelationshipType() {
    // given
    String sourceDescription = "source";
    Supplier<CharReadable> headerSource = () -> wrap(dataWithSourceDescription(":START_ID,:END_ID", sourceDescription), 3);
    Iterable<DataFactory> data = datas(config -> new Data() {

        @Override
        public RawIterator<CharReadable, IOException> stream() {
            return asRawIterator(iterator(headerSource.get()));
        }

        @Override
        public Decorator decorator() {
            return NO_DECORATOR;
        }
    });
    CsvInput.Monitor monitor = mock(CsvInput.Monitor.class);
    // when
    new CsvInput(datas(), defaultFormatNodeFileHeader(), data, defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, monitor, INSTANCE);
    // then
    verify(monitor).noRelationshipTypeSpecified(sourceDescription);
}
Also used : CharReadable(org.neo4j.csv.reader.CharReadable) Matchers.containsString(org.hamcrest.Matchers.containsString) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) RawIterator(org.neo4j.collection.RawIterator) Iterators.asRawIterator(org.neo4j.internal.helpers.collection.Iterators.asRawIterator) Test(org.junit.Test)

Example 4 with CharReadable

use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.

the class CsvInput method sample.

private long[] sample(Iterable<DataFactory> dataFactories, Header.Factory headerFactory, PropertySizeCalculator valueSizeCalculator, ToIntFunction<InputEntity> additionalCalculator) throws IOException {
    // [entity count, property count, property size, labels (for nodes only)]
    long[] estimates = new long[4];
    try (CsvInputChunkProxy chunk = new CsvInputChunkProxy()) {
        // One group of input files
        int groupId = 0;
        for (// one input group
        DataFactory dataFactory : // one input group
        dataFactories) {
            groupId++;
            Header header = null;
            Data data = dataFactory.create(config);
            RawIterator<CharReadable, IOException> sources = data.stream();
            while (sources.hasNext()) {
                try (CharReadable source = sources.next()) {
                    if (header == null) {
                        // Extract the header from the first file in this group
                        // This is the only place we monitor type normalization because it's before import and it touches all headers
                        header = extractHeader(source, headerFactory, idType, config, groups, monitor);
                    }
                    try (CsvInputIterator iterator = new CsvInputIterator(source, data.decorator(), header, config, idType, EMPTY, CsvGroupInputIterator.extractors(config), groupId);
                        InputEntity entity = new InputEntity()) {
                        int entities = 0;
                        int properties = 0;
                        int propertySize = 0;
                        int additional = 0;
                        while (iterator.position() < ESTIMATE_SAMPLE_SIZE && iterator.next(chunk)) {
                            for (; chunk.next(entity); entities++) {
                                properties += entity.propertyCount();
                                propertySize += Inputs.calculatePropertySize(entity, valueSizeCalculator, NULL, memoryTracker);
                                additional += additionalCalculator.applyAsInt(entity);
                            }
                        }
                        if (entities > 0) {
                            long position = iterator.position();
                            double compressionRatio = iterator.compressionRatio();
                            double actualFileSize = source.length() / compressionRatio;
                            long entityCountInSource = (long) ((actualFileSize / position) * entities);
                            estimates[0] += entityCountInSource;
                            estimates[1] += ((double) properties / entities) * entityCountInSource;
                            estimates[2] += ((double) propertySize / entities) * entityCountInSource;
                            estimates[3] += ((double) additional / entities) * entityCountInSource;
                        }
                    }
                }
            }
        }
    }
    return estimates;
}
Also used : IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) CsvInputIterator.extractHeader(org.neo4j.internal.batchimport.input.csv.CsvInputIterator.extractHeader) CharReadable(org.neo4j.csv.reader.CharReadable) InputEntity(org.neo4j.internal.batchimport.input.InputEntity)

Example 5 with CharReadable

use of org.neo4j.csv.reader.CharReadable in project neo4j by neo4j.

the class CsvInputTest method shouldCloseDataIteratorsInTheEnd.

@Test
public void shouldCloseDataIteratorsInTheEnd() throws Exception {
    // GIVEN
    CharReadable nodeData = charReader("1");
    CharReadable relationshipData = charReader("1,1");
    IdType idType = IdType.STRING;
    Iterable<DataFactory<InputNode>> nodeDataIterable = dataIterable(given(nodeData));
    Iterable<DataFactory<InputRelationship>> relationshipDataIterable = dataIterable(data(relationshipData, defaultRelationshipType("TYPE")));
    Input input = new CsvInput(nodeDataIterable, header(entry(null, Type.ID, idType.extractor(extractors))), relationshipDataIterable, header(entry(null, Type.START_ID, idType.extractor(extractors)), entry(null, Type.END_ID, idType.extractor(extractors))), idType, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
    // WHEN
    try (ResourceIterator<InputNode> iterator = input.nodes().iterator()) {
        iterator.next();
    }
    try (ResourceIterator<InputRelationship> iterator = input.relationships().iterator()) {
        iterator.next();
    }
    // THEN
    assertClosed(nodeData);
    assertClosed(relationshipData);
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Input(org.neo4j.unsafe.impl.batchimport.input.Input) CharReadable(org.neo4j.csv.reader.CharReadable) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) Test(org.junit.Test)

Aggregations

CharReadable (org.neo4j.csv.reader.CharReadable)10 Test (org.junit.Test)8 Matchers.containsString (org.hamcrest.Matchers.containsString)7 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)7 RawIterator (org.neo4j.collection.RawIterator)7 Iterators.asRawIterator (org.neo4j.internal.helpers.collection.Iterators.asRawIterator)7 IOException (java.io.IOException)1 UncheckedIOException (java.io.UncheckedIOException)1 Test (org.junit.jupiter.api.Test)1 CharSeeker (org.neo4j.csv.reader.CharSeeker)1 Extractors (org.neo4j.csv.reader.Extractors)1 MultiReadable (org.neo4j.csv.reader.MultiReadable)1 InputEntity (org.neo4j.internal.batchimport.input.InputEntity)1 CsvInputIterator.extractHeader (org.neo4j.internal.batchimport.input.csv.CsvInputIterator.extractHeader)1 DataFactories.defaultFormatNodeFileHeader (org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader)1 DataFactories.defaultFormatRelationshipFileHeader (org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatRelationshipFileHeader)1 Input (org.neo4j.unsafe.impl.batchimport.input.Input)1 InputNode (org.neo4j.unsafe.impl.batchimport.input.InputNode)1 InputRelationship (org.neo4j.unsafe.impl.batchimport.input.InputRelationship)1