Search in sources :

Example 1 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class MapReduceRestoreJob method createLoader.

private LoadIncrementalHFiles createLoader() throws IOException {
    // set configuration for restore:
    // LoadIncrementalHFile needs more time
    // <name>hbase.rpc.timeout</name> <value>600000</value>
    // calculates
    Integer milliSecInHour = 3600000;
    Configuration conf = new Configuration(getConf());
    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, milliSecInHour);
    // By default, it is 32 and loader will fail if # of files in any region exceed this
    // limit. Bad for snapshot restore.
    conf.setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, Integer.MAX_VALUE);
    conf.set(LoadIncrementalHFiles.IGNORE_UNMATCHED_CF_CONF_KEY, "yes");
    LoadIncrementalHFiles loader = null;
    try {
        loader = new LoadIncrementalHFiles(conf);
    } catch (Exception e) {
        throw new IOException(e);
    }
    return loader;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) IOException(java.io.IOException) IOException(java.io.IOException)

Example 2 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class RestoreTool method restoreTableAndCreate.

private void restoreTableAndCreate(Connection conn, TableName tableName, TableName newTableName, Path tableBackupPath, boolean truncateIfExists, String lastIncrBackupId) throws IOException {
    if (newTableName == null) {
        newTableName = tableName;
    }
    FileSystem fileSys = tableBackupPath.getFileSystem(this.conf);
    // get table descriptor first
    HTableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, lastIncrBackupId);
    if (tableDescriptor != null) {
        LOG.debug("Retrieved descriptor: " + tableDescriptor + " thru " + lastIncrBackupId);
    }
    if (tableDescriptor == null) {
        Path tableSnapshotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
        if (fileSys.exists(tableSnapshotPath)) {
            // check whether snapshot dir already recorded for target table
            if (snapshotMap.get(tableName) != null) {
                SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fileSys, tableSnapshotPath);
                SnapshotManifest manifest = SnapshotManifest.open(conf, fileSys, tableSnapshotPath, desc);
                tableDescriptor = manifest.getTableDescriptor();
            } else {
                tableDescriptor = getTableDesc(tableName);
                snapshotMap.put(tableName, getTableInfoPath(tableName));
            }
            if (tableDescriptor == null) {
                LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost");
            }
        } else {
            throw new IOException("Table snapshot directory: " + tableSnapshotPath + " does not exist.");
        }
    }
    Path tableArchivePath = getTableArchivePath(tableName);
    if (tableArchivePath == null) {
        if (tableDescriptor != null) {
            // find table descriptor but no archive dir means the table is empty, create table and exit
            if (LOG.isDebugEnabled()) {
                LOG.debug("find table descriptor but no archive dir for table " + tableName + ", will only create table");
            }
            tableDescriptor.setName(newTableName);
            checkAndCreateTable(conn, tableBackupPath, tableName, newTableName, null, tableDescriptor, truncateIfExists);
            return;
        } else {
            throw new IllegalStateException("Cannot restore hbase table because directory '" + " tableArchivePath is null.");
        }
    }
    if (tableDescriptor == null) {
        tableDescriptor = new HTableDescriptor(newTableName);
    } else {
        tableDescriptor.setName(newTableName);
    }
    // load all files in dir
    try {
        ArrayList<Path> regionPathList = getRegionList(tableName);
        // should only try to create the table with all region informations, so we could pre-split
        // the regions in fine grain
        checkAndCreateTable(conn, tableBackupPath, tableName, newTableName, regionPathList, tableDescriptor, truncateIfExists);
        if (tableArchivePath != null) {
            // start real restore through bulkload
            // if the backup target is on local cluster, special action needed
            Path tempTableArchivePath = checkLocalAndBackup(tableArchivePath);
            if (tempTableArchivePath.equals(tableArchivePath)) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("TableArchivePath for bulkload using existPath: " + tableArchivePath);
                }
            } else {
                // point to the tempDir
                regionPathList = getRegionList(tempTableArchivePath);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("TableArchivePath for bulkload using tempPath: " + tempTableArchivePath);
                }
            }
            LoadIncrementalHFiles loader = createLoader(tempTableArchivePath, false);
            for (Path regionPath : regionPathList) {
                String regionName = regionPath.toString();
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Restoring HFiles from directory " + regionName);
                }
                String[] args = { regionName, newTableName.getNameAsString() };
                loader.run(args);
            }
        }
    // we do not recovered edits
    } catch (Exception e) {
        throw new IllegalStateException("Cannot restore hbase table", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SnapshotManifest(org.apache.hadoop.hbase.snapshot.SnapshotManifest) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) FileSystem(org.apache.hadoop.fs.FileSystem) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem) HBackupFileSystem(org.apache.hadoop.hbase.backup.HBackupFileSystem) SnapshotDescription(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.SnapshotDescription) IOException(java.io.IOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor)

Example 3 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project phoenix by apache.

the class AbstractBulkLoadTool method completebulkload.

private void completebulkload(Configuration conf, Path outputPath, List<TargetTableRef> tablesToBeLoaded) throws Exception {
    Set<String> tableNames = new HashSet<>(tablesToBeLoaded.size());
    for (TargetTableRef table : tablesToBeLoaded) {
        if (tableNames.contains(table.getPhysicalName())) {
            continue;
        }
        tableNames.add(table.getPhysicalName());
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        String tableName = table.getPhysicalName();
        Path tableOutputPath = CsvBulkImportUtil.getOutputPath(outputPath, tableName);
        HTable htable = new HTable(conf, tableName);
        LOG.info("Loading HFiles for {} from {}", tableName, tableOutputPath);
        loader.doBulkLoad(tableOutputPath, htable);
        LOG.info("Incremental load complete for table=" + tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TargetTableRef(org.apache.phoenix.mapreduce.bulkload.TargetTableRef) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) HTable(org.apache.hadoop.hbase.client.HTable) HashSet(java.util.HashSet)

Example 4 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project phoenix by apache.

the class IndexTool method run.

@Override
public int run(String[] args) throws Exception {
    Connection connection = null;
    HTable htable = null;
    try {
        CommandLine cmdLine = null;
        try {
            cmdLine = parseOptions(args);
        } catch (IllegalStateException e) {
            printHelpAndExit(e.getMessage(), getOptions());
        }
        final Configuration configuration = HBaseConfiguration.addHbaseResources(getConf());
        final String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPTION.getOpt());
        final String dataTable = cmdLine.getOptionValue(DATA_TABLE_OPTION.getOpt());
        final String indexTable = cmdLine.getOptionValue(INDEX_TABLE_OPTION.getOpt());
        final boolean isPartialBuild = cmdLine.hasOption(PARTIAL_REBUILD_OPTION.getOpt());
        final String qDataTable = SchemaUtil.getQualifiedTableName(schemaName, dataTable);
        boolean useDirectApi = cmdLine.hasOption(DIRECT_API_OPTION.getOpt());
        String basePath = cmdLine.getOptionValue(OUTPUT_PATH_OPTION.getOpt());
        boolean isForeground = cmdLine.hasOption(RUN_FOREGROUND_OPTION.getOpt());
        boolean useSnapshot = cmdLine.hasOption(SNAPSHOT_OPTION.getOpt());
        connection = ConnectionUtil.getInputConnection(configuration);
        byte[][] splitKeysBeforeJob = null;
        boolean isLocalIndexBuild = false;
        PTable pindexTable = null;
        if (indexTable != null) {
            if (!isValidIndexTable(connection, qDataTable, indexTable)) {
                throw new IllegalArgumentException(String.format(" %s is not an index table for %s ", indexTable, qDataTable));
            }
            pindexTable = PhoenixRuntime.getTable(connection, schemaName != null && !schemaName.isEmpty() ? SchemaUtil.getQualifiedTableName(schemaName, indexTable) : indexTable);
            htable = (HTable) connection.unwrap(PhoenixConnection.class).getQueryServices().getTable(pindexTable.getPhysicalName().getBytes());
            if (IndexType.LOCAL.equals(pindexTable.getIndexType())) {
                isLocalIndexBuild = true;
                splitKeysBeforeJob = htable.getRegionLocator().getStartKeys();
            }
        }
        PTable pdataTable = PhoenixRuntime.getTableNoCache(connection, qDataTable);
        Path outputPath = null;
        FileSystem fs = null;
        if (basePath != null) {
            outputPath = CsvBulkImportUtil.getOutputPath(new Path(basePath), pindexTable == null ? pdataTable.getPhysicalName().getString() : pindexTable.getPhysicalName().getString());
            fs = outputPath.getFileSystem(configuration);
            fs.delete(outputPath, true);
        }
        Job job = new JobFactory(connection, configuration, outputPath).getJob(schemaName, indexTable, dataTable, useDirectApi, isPartialBuild, useSnapshot);
        if (!isForeground && useDirectApi) {
            LOG.info("Running Index Build in Background - Submit async and exit");
            job.submit();
            return 0;
        }
        LOG.info("Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
        boolean result = job.waitForCompletion(true);
        if (result) {
            if (!useDirectApi && indexTable != null) {
                if (isLocalIndexBuild) {
                    validateSplitForLocalIndex(splitKeysBeforeJob, htable);
                }
                LOG.info("Loading HFiles from {}", outputPath);
                LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration);
                loader.doBulkLoad(outputPath, htable);
                htable.close();
                // Without direct API, we need to update the index state to ACTIVE from client.
                IndexToolUtil.updateIndexState(connection, qDataTable, indexTable, PIndexState.ACTIVE);
                fs.delete(outputPath, true);
            }
            return 0;
        } else {
            LOG.error("IndexTool job failed! Check logs for errors..");
            return -1;
        }
    } catch (Exception ex) {
        LOG.error("An exception occurred while performing the indexing job: " + ExceptionUtils.getMessage(ex) + " at:\n" + ExceptionUtils.getStackTrace(ex));
        return -1;
    } finally {
        try {
            if (connection != null) {
                connection.close();
            }
            if (htable != null) {
                htable.close();
            }
        } catch (SQLException sqle) {
            LOG.error("Failed to close connection ", sqle.getMessage());
            throw new RuntimeException("Failed to close connection");
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) SQLException(java.sql.SQLException) Connection(java.sql.Connection) PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) HTable(org.apache.hadoop.hbase.client.HTable) PTable(org.apache.phoenix.schema.PTable) ParseException(org.apache.commons.cli.ParseException) SQLException(java.sql.SQLException) CommandLine(org.apache.commons.cli.CommandLine) LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 5 with LoadIncrementalHFiles

use of org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles in project hbase by apache.

the class RestoreTool method createLoader.

/**
   * Create a {@link LoadIncrementalHFiles} instance to be used to restore the HFiles of a full
   * backup.
   * @return the {@link LoadIncrementalHFiles} instance
   * @throws IOException exception
   */
private LoadIncrementalHFiles createLoader(Path tableArchivePath, boolean multipleTables) throws IOException {
    // By default, it is 32 and loader will fail if # of files in any region exceed this
    // limit. Bad for snapshot restore.
    this.conf.setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, Integer.MAX_VALUE);
    this.conf.set(LoadIncrementalHFiles.IGNORE_UNMATCHED_CF_CONF_KEY, "yes");
    LoadIncrementalHFiles loader = null;
    try {
        loader = new LoadIncrementalHFiles(this.conf);
    } catch (Exception e1) {
        throw new IOException(e1);
    }
    return loader;
}
Also used : LoadIncrementalHFiles(org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles) IOException(java.io.IOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException)

Aggregations

LoadIncrementalHFiles (org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles)9 IOException (java.io.IOException)6 Path (org.apache.hadoop.fs.Path)6 Configuration (org.apache.hadoop.conf.Configuration)4 FileNotFoundException (java.io.FileNotFoundException)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 HTable (org.apache.hadoop.hbase.client.HTable)3 Job (org.apache.hadoop.mapreduce.Job)3 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 Connection (java.sql.Connection)1 SQLException (java.sql.SQLException)1 HashSet (java.util.HashSet)1 CommandLine (org.apache.commons.cli.CommandLine)1 ParseException (org.apache.commons.cli.ParseException)1 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)1 HBackupFileSystem (org.apache.hadoop.hbase.backup.HBackupFileSystem)1 Connection (org.apache.hadoop.hbase.client.Connection)1 HRegionFileSystem (org.apache.hadoop.hbase.regionserver.HRegionFileSystem)1 SnapshotDescription (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.SnapshotDescription)1 SnapshotManifest (org.apache.hadoop.hbase.snapshot.SnapshotManifest)1