Search in sources :

Example 31 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class LogManager method appendUnmappableStatementToDebugFile.

// Mapping errors (failed record -> statement or row -> record mappings)
// record -> statement failed (load workflow)
@SuppressWarnings("BlockingMethodInNonBlockingContext")
private Mono<UnmappableStatement> appendUnmappableStatementToDebugFile(UnmappableStatement statement) {
    try {
        Path logFile = operationDirectory.resolve(MAPPING_ERRORS_FILE);
        PrintWriter writer = openFiles.get(logFile);
        assert writer != null;
        Record record = statement.getRecord();
        writer.println("Resource: " + record.getResource());
        writer.println("Position: " + record.getPosition());
        if (record.getSource() != null) {
            writer.println("Source: " + LogManagerUtils.formatSource(record));
        }
        stackTracePrinter.printStackTrace(statement.getError(), writer);
        writer.println();
        writer.flush();
        return Mono.just(statement);
    } catch (Exception e) {
        return Mono.error(e);
    }
}
Also used : Path(java.nio.file.Path) Record(com.datastax.oss.dsbulk.connectors.api.Record) ErrorRecord(com.datastax.oss.dsbulk.connectors.api.ErrorRecord) InvalidMappingException(com.datastax.oss.dsbulk.workflow.commons.schema.InvalidMappingException) BusyConnectionException(com.datastax.oss.driver.api.core.connection.BusyConnectionException) QueryExecutionException(com.datastax.oss.driver.api.core.servererrors.QueryExecutionException) TooManyErrorsException(com.datastax.oss.dsbulk.workflow.api.error.TooManyErrorsException) AllNodesFailedException(com.datastax.oss.driver.api.core.AllNodesFailedException) RequestThrottlingException(com.datastax.oss.driver.api.core.RequestThrottlingException) InvalidQueryException(com.datastax.oss.driver.api.core.servererrors.InvalidQueryException) FrameTooLongException(com.datastax.oss.driver.api.core.connection.FrameTooLongException) DriverTimeoutException(com.datastax.oss.driver.api.core.DriverTimeoutException) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Example 32 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class UnloadWorkflow method oneWriter.

private Flux<Record> oneWriter() {
    int numThreads = Math.min(numCores * 2, readConcurrency);
    Scheduler scheduler = numThreads == 1 ? Schedulers.immediate() : Schedulers.newParallel(numThreads, new DefaultThreadFactory("workflow"));
    schedulers.add(scheduler);
    return Flux.fromIterable(readStatements).flatMap(results -> Flux.from(executor.readReactive(results)).publishOn(scheduler, 500).transform(queryWarningsHandler).transform(totalItemsMonitor).transform(totalItemsCounter).transform(failedReadResultsMonitor).transform(failedReadsHandler).map(readResultMapper::map).transform(failedRecordsMonitor).transform(unmappableRecordsHandler), readConcurrency, 500).transform(writer).transform(failedRecordsMonitor).transform(failedRecordsHandler);
}
Also used : DefaultThreadFactory(io.netty.util.concurrent.DefaultThreadFactory) ReadResult(com.datastax.oss.dsbulk.executor.api.result.ReadResult) Connector(com.datastax.oss.dsbulk.connectors.api.Connector) DefaultThreadFactory(io.netty.util.concurrent.DefaultThreadFactory) BulkReader(com.datastax.oss.dsbulk.executor.api.reader.BulkReader) DriverSettings(com.datastax.oss.dsbulk.workflow.commons.settings.DriverSettings) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Workflow(com.datastax.oss.dsbulk.workflow.api.Workflow) Scheduler(reactor.core.scheduler.Scheduler) Function(java.util.function.Function) ExecutorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ExecutorSettings) SchemaSettings(com.datastax.oss.dsbulk.workflow.commons.settings.SchemaSettings) HashSet(java.util.HashSet) RecordMetadata(com.datastax.oss.dsbulk.connectors.api.RecordMetadata) CqlSession(com.datastax.oss.driver.api.core.CqlSession) ConnectorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ConnectorSettings) Duration(java.time.Duration) SchemaGenerationStrategy(com.datastax.oss.dsbulk.workflow.commons.settings.SchemaGenerationStrategy) Schedulers(reactor.core.scheduler.Schedulers) Record(com.datastax.oss.dsbulk.connectors.api.Record) Stopwatch(com.datastax.oss.driver.shaded.guava.common.base.Stopwatch) CommonConnectorFeature(com.datastax.oss.dsbulk.connectors.api.CommonConnectorFeature) Logger(org.slf4j.Logger) Config(com.typesafe.config.Config) LogSettings(com.datastax.oss.dsbulk.workflow.commons.settings.LogSettings) Publisher(org.reactivestreams.Publisher) ConvertingCodecFactory(com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory) SettingsManager(com.datastax.oss.dsbulk.workflow.commons.settings.SettingsManager) EngineSettings(com.datastax.oss.dsbulk.workflow.commons.settings.EngineSettings) Set(java.util.Set) ClusterInformationUtils(com.datastax.oss.dsbulk.workflow.commons.utils.ClusterInformationUtils) CodecSettings(com.datastax.oss.dsbulk.workflow.commons.settings.CodecSettings) MonitoringSettings(com.datastax.oss.dsbulk.workflow.commons.settings.MonitoringSettings) TimeUnit(java.util.concurrent.TimeUnit) Flux(reactor.core.publisher.Flux) List(java.util.List) CloseableUtils(com.datastax.oss.dsbulk.workflow.commons.utils.CloseableUtils) ReadResultMapper(com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultMapper) DurationUtils(com.datastax.oss.dsbulk.workflow.api.utils.DurationUtils) MetricsManager(com.datastax.oss.dsbulk.workflow.commons.metrics.MetricsManager) Statement(com.datastax.oss.driver.api.core.cql.Statement) LogManager(com.datastax.oss.dsbulk.workflow.commons.log.LogManager) Scheduler(reactor.core.scheduler.Scheduler)

Example 33 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class LogManagerTest method should_handle_failed_records_without_source.

@Test
void should_handle_failed_records_without_source() throws Exception {
    Path outputDir = Files.createTempDirectory("test");
    LogManager logManager = new LogManager(session, outputDir, ErrorThreshold.forAbsoluteValue(1), ErrorThreshold.forAbsoluteValue(0), true, statementFormatter, EXTENDED, rowFormatter);
    logManager.init();
    Record record = new DefaultErrorRecord(null, resource1, 1, new RuntimeException("error 1"));
    Flux<Record> stmts = Flux.just(record);
    stmts.transform(logManager.newFailedRecordsHandler()).blockLast();
    logManager.close();
    Path errors = logManager.getOperationDirectory().resolve("connector-errors.log");
    Path positions = logManager.getOperationDirectory().resolve("positions.txt");
    assertThat(errors.toFile()).exists();
    assertThat(positions.toFile()).exists();
    assertThat(FileUtils.listAllFilesInDirectory(logManager.getOperationDirectory())).containsOnly(errors, positions);
    List<String> lines = Files.readAllLines(errors, UTF_8);
    String content = String.join("\n", lines);
    assertThat(content).doesNotContain("Source: ").contains("Resource: " + resource1).contains("java.lang.RuntimeException: error 1");
    List<String> positionLines = Files.readAllLines(positions, UTF_8);
    assertThat(positionLines).containsOnly("file:///file1.csv:1");
}
Also used : Path(java.nio.file.Path) DefaultErrorRecord(com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord) DefaultErrorRecord(com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) Test(org.junit.jupiter.api.Test)

Example 34 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class LogManagerTest method should_handle_unmappable_records_without_source.

@Test
void should_handle_unmappable_records_without_source() throws Exception {
    Path outputDir = Files.createTempDirectory("test");
    LogManager logManager = new LogManager(session, outputDir, ErrorThreshold.forAbsoluteValue(1), ErrorThreshold.forAbsoluteValue(0), true, statementFormatter, EXTENDED, rowFormatter);
    logManager.init();
    Record record = new DefaultErrorRecord(null, tableResource, 1, new RuntimeException("error 1"));
    Flux<Record> stmts = Flux.just(record);
    stmts.transform(logManager.newUnmappableRecordsHandler()).blockLast();
    logManager.close();
    Path errors = logManager.getOperationDirectory().resolve("mapping-errors.log");
    assertThat(errors.toFile()).exists();
    assertThat(FileUtils.listAllFilesInDirectory(logManager.getOperationDirectory())).containsOnly(errors);
    List<String> lines = Files.readAllLines(errors, UTF_8);
    String content = String.join("\n", lines);
    assertThat(content).doesNotContain("Source: ").doesNotContain("Resource: ").doesNotContain("Position: ").contains("java.lang.RuntimeException: error 1");
}
Also used : Path(java.nio.file.Path) DefaultErrorRecord(com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord) DefaultErrorRecord(com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) Test(org.junit.jupiter.api.Test)

Example 35 with Record

use of com.datastax.oss.dsbulk.connectors.api.Record in project dsbulk by datastax.

the class LogManagerTest method should_print_raw_bytes_when_column_cannot_be_properly_deserialized.

@Test
void should_print_raw_bytes_when_column_cannot_be_properly_deserialized() throws Exception {
    Path outputDir = Files.createTempDirectory("test");
    LogManager logManager = new LogManager(session, outputDir, ErrorThreshold.forAbsoluteValue(2), ErrorThreshold.forAbsoluteValue(0), true, statementFormatter, EXTENDED, rowFormatter);
    // Emulate bad row with corrupted data, see DefaultReadResultMapper
    IllegalArgumentException cause = new IllegalArgumentException("Invalid 32-bits integer value, expecting 4 bytes but got 5");
    IllegalArgumentException iae = new IllegalArgumentException("Could not deserialize column c1 of type int as java.lang.Integer", cause);
    when(row1.getObject(0)).thenThrow(cause);
    when(row1.getBytesUnsafe(0)).thenReturn(ByteBuffer.wrap(new byte[] { 1, 2, 3, 4, 5 }));
    rowRecord1 = new DefaultErrorRecord(successfulReadResult1, tableResource, 1, iae);
    logManager.init();
    Flux<Record> stmts = Flux.just(rowRecord1);
    stmts.transform(logManager.newUnmappableRecordsHandler()).blockLast();
    logManager.close();
    Path errors = logManager.getOperationDirectory().resolve("mapping-errors.log");
    assertThat(errors.toFile()).exists();
    assertThat(FileUtils.listAllFilesInDirectory(logManager.getOperationDirectory())).containsOnly(errors);
    List<String> lines = Files.readAllLines(errors, UTF_8);
    String content = String.join("\n", lines);
    assertThat(content).doesNotContain("Resource: ").doesNotContain("Position: ").contains("SELECT 1").contains("c1: 0x0102030405 (malformed buffer for type INT)").contains(iae.getMessage()).contains(cause.getMessage());
}
Also used : Path(java.nio.file.Path) DefaultErrorRecord(com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord) DefaultErrorRecord(com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord) DefaultRecord(com.datastax.oss.dsbulk.connectors.api.DefaultRecord) Record(com.datastax.oss.dsbulk.connectors.api.Record) Test(org.junit.jupiter.api.Test)

Aggregations

Record (com.datastax.oss.dsbulk.connectors.api.Record)54 DefaultRecord (com.datastax.oss.dsbulk.connectors.api.DefaultRecord)40 Config (com.typesafe.config.Config)39 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)37 Test (org.junit.jupiter.api.Test)35 ErrorRecord (com.datastax.oss.dsbulk.connectors.api.ErrorRecord)24 Path (java.nio.file.Path)24 DefaultIndexedField (com.datastax.oss.dsbulk.connectors.api.DefaultIndexedField)10 Function (java.util.function.Function)9 MethodSource (org.junit.jupiter.params.provider.MethodSource)9 DefaultMappedField (com.datastax.oss.dsbulk.connectors.api.DefaultMappedField)8 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Publisher (org.reactivestreams.Publisher)8 DefaultErrorRecord (com.datastax.oss.dsbulk.connectors.api.DefaultErrorRecord)7 IOException (java.io.IOException)7 ValueSource (org.junit.jupiter.params.provider.ValueSource)7 Flux (reactor.core.publisher.Flux)7 DataStaxBulkLoader (com.datastax.oss.dsbulk.runner.DataStaxBulkLoader)6 ExitStatus (com.datastax.oss.dsbulk.runner.ExitStatus)6