Examples with LoadIncrementalHFiles - org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles

Example 6 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project phoenix by apache.

the class IndexTool method run.

@Override
public int run(String[] args) throws Exception {
    Connection connection = null;
    HTable htable = null;
    try {
        CommandLine cmdLine = null;
        try {
            cmdLine = parseOptions(args);
        } catch (IllegalStateException e) {
            printHelpAndExit(e.getMessage(), getOptions());
        }
        final Configuration configuration = HBaseConfiguration.addHbaseResources(getConf());
        final String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPTION.getOpt());
        final String dataTable = cmdLine.getOptionValue(DATA_TABLE_OPTION.getOpt());
        final String indexTable = cmdLine.getOptionValue(INDEX_TABLE_OPTION.getOpt());
        final boolean isPartialBuild = cmdLine.hasOption(PARTIAL_REBUILD_OPTION.getOpt());
        final String qDataTable = SchemaUtil.getQualifiedTableName(schemaName, dataTable);
        boolean useDirectApi = cmdLine.hasOption(DIRECT_API_OPTION.getOpt());
        String basePath = cmdLine.getOptionValue(OUTPUT_PATH_OPTION.getOpt());
        boolean isForeground = cmdLine.hasOption(RUN_FOREGROUND_OPTION.getOpt());
        connection = ConnectionUtil.getInputConnection(configuration);
        byte[][] splitKeysBeforeJob = null;
        boolean isLocalIndexBuild = false;
        PTable pindexTable = null;
        if (indexTable != null) {
            if (!isValidIndexTable(connection, qDataTable, indexTable)) {
                throw new IllegalArgumentException(String.format(" %s is not an index table for %s ", indexTable, qDataTable));
            }
            pindexTable = PhoenixRuntime.getTable(connection, schemaName != null && !schemaName.isEmpty() ? SchemaUtil.getQualifiedTableName(schemaName, indexTable) : indexTable);
            htable = (HTable) connection.unwrap(PhoenixConnection.class).getQueryServices().getTable(pindexTable.getPhysicalName().getBytes());
            if (IndexType.LOCAL.equals(pindexTable.getIndexType())) {
                isLocalIndexBuild = true;
                splitKeysBeforeJob = htable.getRegionLocator().getStartKeys();
            }
        }
        PTable pdataTable = PhoenixRuntime.getTableNoCache(connection, qDataTable);
        Path outputPath = null;
        FileSystem fs = null;
        if (basePath != null) {
            outputPath = CsvBulkImportUtil.getOutputPath(new Path(basePath), pindexTable == null ? pdataTable.getPhysicalName().getString() : pindexTable.getPhysicalName().getString());
            fs = outputPath.getFileSystem(configuration);
            fs.delete(outputPath, true);
        }
        Job job = new JobFactory(connection, configuration, outputPath).getJob(schemaName, indexTable, dataTable, useDirectApi, isPartialBuild);
        if (!isForeground && useDirectApi) {
            LOG.info("Running Index Build in Background - Submit async and exit");
            job.submit();
            return 0;
        }
        LOG.info("Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
        boolean result = job.waitForCompletion(true);
        if (result) {
            if (!useDirectApi && indexTable != null) {
                if (isLocalIndexBuild) {
                    validateSplitForLocalIndex(splitKeysBeforeJob, htable);
                }
                LOG.info("Loading HFiles from {}", outputPath);
                LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration);
                loader.doBulkLoad(outputPath, htable);
                htable.close();
                // Without direct API, we need to update the index state to ACTIVE from client.
                IndexToolUtil.updateIndexState(connection, qDataTable, indexTable, PIndexState.ACTIVE);
                fs.delete(outputPath, true);
            }
            return 0;
        } else {
            LOG.error("IndexTool job failed! Check logs for errors..");
            return -1;
        }
    } catch (Exception ex) {
        LOG.error("An exception occurred while performing the indexing job: " + ExceptionUtils.getMessage(ex) + " at:\n" + ExceptionUtils.getStackTrace(ex));
        return -1;
    } finally {
        try {
            if (connection != null) {
                connection.close();
            }
            if (htable != null) {
                htable.close();
            }
        } catch (SQLException sqle) {
            LOG.error("Failed to close connection ", sqle.getMessage());
            throw new RuntimeException("Failed to close connection");
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) SQLException(java.sql.SQLException) Connection(java.sql.Connection) PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) HTable(org.apache.hadoop.hbase.client.HTable) PTable(org.apache.phoenix.schema.PTable) ParseException(org.apache.commons.cli.ParseException) SQLException(java.sql.SQLException) CommandLine(org.apache.commons.cli.CommandLine) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 7 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class TestReplicationSyncUpToolWithBulkLoadedData method loadAndValidateHFileReplication.

private void loadAndValidateHFileReplication(String testName, byte[] row, byte[] fam, Table source, byte[][][] hfileRanges, int numOfRows) throws Exception {
    Path dir = utility1.getDataTestDirOnTestFS(testName);
    FileSystem fs = utility1.getTestFileSystem();
    dir = dir.makeQualified(fs);
    Path familyDir = new Path(dir, Bytes.toString(fam));
    int hfileIdx = 0;
    for (byte[][] range : hfileRanges) {
        byte[] from = range[0];
        byte[] to = range[1];
        HFileTestUtil.createHFile(utility1.getConfiguration(), fs, new Path(familyDir, "hfile_" + hfileIdx++), fam, row, from, to, numOfRows);
    }
    final TableName tableName = source.getName();
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(utility1.getConfiguration());
    String[] args = { dir.toString(), tableName.toString() };
    loader.run(args);
}

Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 8 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class RestoreTool method createLoader.

/**
   * Create a {@link LoadIncrementalHFiles} instance to be used to restore the HFiles of a full
   * backup.
   * @return the {@link LoadIncrementalHFiles} instance
   * @throws IOException exception
   */
private LoadIncrementalHFiles createLoader(Path tableArchivePath, boolean multipleTables) throws IOException {
    // By default, it is 32 and loader will fail if # of files in any region exceed this
    // limit. Bad for snapshot restore.
    this.conf.setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, Integer.MAX_VALUE);
    this.conf.set(LoadIncrementalHFiles.IGNORE_UNMATCHED_CF_CONF_KEY, "yes");
    LoadIncrementalHFiles loader = null;
    try {
        loader = new LoadIncrementalHFiles(this.conf);
    } catch (Exception e1) {
        throw new IOException(e1);
    }
    return loader;
}

Also used : LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) IOException(java.io.IOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException)

Example 9 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class MapReduceRestoreJob method run.

@Override
public void run(Path[] dirPaths, TableName[] tableNames, TableName[] newTableNames, boolean fullBackupRestore) throws IOException {
    String bulkOutputConfKey;
    if (fullBackupRestore) {
        player = new HFileSplitterJob();
        bulkOutputConfKey = HFileSplitterJob.BULK_OUTPUT_CONF_KEY;
    } else {
        player = new WALPlayer();
        bulkOutputConfKey = WALPlayer.BULK_OUTPUT_CONF_KEY;
    }
    // Player reads all files in arbitrary directory structure and creates
    // a Map task for each file
    String dirs = StringUtils.join(dirPaths, ",");
    if (LOG.isDebugEnabled()) {
        LOG.debug("Restore " + (fullBackupRestore ? "full" : "incremental") + " backup from directory " + dirs + " from hbase tables " + StringUtils.join(tableNames, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND) + " to tables " + StringUtils.join(newTableNames, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND));
    }
    for (int i = 0; i < tableNames.length; i++) {
        LOG.info("Restore " + tableNames[i] + " into " + newTableNames[i]);
        Path bulkOutputPath = getBulkOutputDir(getFileNameCompatibleString(newTableNames[i]));
        Configuration conf = getConf();
        conf.set(bulkOutputConfKey, bulkOutputPath.toString());
        String[] playerArgs = { dirs, tableNames[i].getNameAsString() };
        int result = 0;
        int loaderResult = 0;
        try {
            player.setConf(getConf());
            result = player.run(playerArgs);
            if (succeeded(result)) {
                // do bulk load
                LoadIncrementalHFiles loader = createLoader();
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Restoring HFiles from directory " + bulkOutputPath);
                }
                String[] args = { bulkOutputPath.toString(), newTableNames[i].getNameAsString() };
                loaderResult = loader.run(args);
                if (failed(loaderResult)) {
                    throw new IOException("Can not restore from backup directory " + dirs + " (check Hadoop and HBase logs). Bulk loader return code =" + loaderResult);
                }
            } else {
                throw new IOException("Can not restore from backup directory " + dirs + " (check Hadoop/MR and HBase logs). Player return code =" + result);
            }
            LOG.debug("Restore Job finished:" + result);
        } catch (Exception e) {
            throw new IOException("Can not restore from backup directory " + dirs + " (check Hadoop and HBase logs) ", e);
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) WALPlayer(org.apache.hadoop.hbase.mapreduce.WALPlayer) IOException(java.io.IOException) IOException(java.io.IOException)

Example 10 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class PartitionedMobCompactor method bulkloadRefFile.

/**
   * Bulkloads the current file.
   *
   * @param connection to use to get admin/RegionLocator
   * @param table The current table.
   * @param bulkloadDirectory The path of bulkload directory.
   * @param fileName The current file name.
   * @throws IOException if IO failure is encountered
   */
private void bulkloadRefFile(Connection connection, Table table, Path bulkloadDirectory, String fileName) throws IOException {
    // bulkload the ref file
    try {
        LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
        bulkload.doBulkLoad(bulkloadDirectory, connection.getAdmin(), table, connection.getRegionLocator(table.getName()));
    } catch (Exception e) {
        throw new IOException(e);
    }
}

Also used : LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException)

Aggregations

LoadIncrementalHFiles (org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles)14 Path (org.apache.hadoop.fs.Path)11 Configuration (org.apache.hadoop.conf.Configuration)8 IOException (java.io.IOException)7 TableName (org.apache.hadoop.hbase.TableName)7 Table (org.apache.hadoop.hbase.client.Table)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)5 FileNotFoundException (java.io.FileNotFoundException)4 Test (org.junit.Test)4 KeyValue (org.apache.hadoop.hbase.KeyValue)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Put (org.apache.hadoop.hbase.client.Put)3 Result (org.apache.hadoop.hbase.client.Result)3 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)3 Scan (org.apache.hadoop.hbase.client.Scan)3 Cell (org.apache.hadoop.hbase.Cell)2 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 HTable (org.apache.hadoop.hbase.client.HTable)2 InterruptedIOException (java.io.InterruptedIOException)1