Search in sources :

Example 41 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class Import method createSubmittableJob.

/**
   * Sets up the actual job.
   * @param conf The current configuration.
   * @param args The command line parameters.
   * @return The newly created job.
   * @throws IOException When setting up the job fails.
   */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    TableName tableName = TableName.valueOf(args[0]);
    conf.set(TABLE_NAME, tableName.getNameAsString());
    Path inputDir = new Path(args[1]);
    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
    job.setJarByClass(Importer.class);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    // make sure we get the filter in the jars
    try {
        Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
        if (filter != null) {
            TableMapReduceUtil.addDependencyJarsForClasses(conf, filter);
        }
    } catch (Exception e) {
        throw new IOException(e);
    }
    if (hfileOutPath != null && conf.getBoolean(HAS_LARGE_RESULT, false)) {
        LOG.info("Use Large Result!!");
        try (Connection conn = ConnectionFactory.createConnection(conf);
            Table table = conn.getTable(tableName);
            RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
            HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
            job.setMapperClass(KeyValueSortImporter.class);
            job.setReducerClass(KeyValueReducer.class);
            Path outputDir = new Path(hfileOutPath);
            FileOutputFormat.setOutputPath(job, outputDir);
            job.setMapOutputKeyClass(KeyValueWritableComparable.class);
            job.setMapOutputValueClass(KeyValue.class);
            job.getConfiguration().setClass("mapreduce.job.output.key.comparator.class", KeyValueWritableComparable.KeyValueWritableComparator.class, RawComparator.class);
            Path partitionsPath = new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
            FileSystem fs = FileSystem.get(job.getConfiguration());
            fs.deleteOnExit(partitionsPath);
            job.setPartitionerClass(KeyValueWritableComparablePartitioner.class);
            job.setNumReduceTasks(regionLocator.getStartKeys().length);
            TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), com.google.common.base.Preconditions.class);
        }
    } else if (hfileOutPath != null) {
        LOG.info("writing to hfiles for bulk load.");
        job.setMapperClass(KeyValueImporter.class);
        try (Connection conn = ConnectionFactory.createConnection(conf);
            Table table = conn.getTable(tableName);
            RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
            job.setReducerClass(KeyValueSortReducer.class);
            Path outputDir = new Path(hfileOutPath);
            FileOutputFormat.setOutputPath(job, outputDir);
            job.setMapOutputKeyClass(ImmutableBytesWritable.class);
            job.setMapOutputValueClass(KeyValue.class);
            HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
            TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), com.google.common.base.Preconditions.class);
        }
    } else {
        LOG.info("writing directly to table from Mapper.");
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // because it sets up the TableOutputFormat.
        job.setMapperClass(Importer.class);
        TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
        job.setNumReduceTasks(0);
    }
    return job;
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) KeyValue(org.apache.hadoop.hbase.KeyValue) Connection(org.apache.hadoop.hbase.client.Connection) IOException(java.io.IOException) InvocationTargetException(java.lang.reflect.InvocationTargetException) ZooKeeperConnectionException(org.apache.hadoop.hbase.ZooKeeperConnectionException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) TableName(org.apache.hadoop.hbase.TableName) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 42 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class RegionSplitter method getSplits.

static LinkedList<Pair<byte[], byte[]>> getSplits(final Connection connection, TableName tableName, SplitAlgorithm splitAlgo) throws IOException {
    Pair<Path, Path> tableDirAndSplitFile = getTableDirAndSplitFile(connection.getConfiguration(), tableName);
    Path tableDir = tableDirAndSplitFile.getFirst();
    Path splitFile = tableDirAndSplitFile.getSecond();
    FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());
    // Using strings because (new byte[]{0}).equals(new byte[]{0}) == false
    Set<Pair<String, String>> daughterRegions = Sets.newHashSet();
    // Does a split file exist?
    if (!fs.exists(splitFile)) {
        // NO = fresh start. calculate splits to make
        LOG.debug("No " + splitFile.getName() + " file. Calculating splits ");
        // Query meta for all regions in the table
        Set<Pair<byte[], byte[]>> rows = Sets.newHashSet();
        Pair<byte[][], byte[][]> tmp = null;
        try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
            tmp = regionLocator.getStartEndKeys();
        }
        Preconditions.checkArgument(tmp.getFirst().length == tmp.getSecond().length, "Start and End rows should be equivalent");
        for (int i = 0; i < tmp.getFirst().length; ++i) {
            byte[] start = tmp.getFirst()[i], end = tmp.getSecond()[i];
            if (start.length == 0)
                start = splitAlgo.firstRow();
            if (end.length == 0)
                end = splitAlgo.lastRow();
            rows.add(Pair.newPair(start, end));
        }
        LOG.debug("Table " + tableName + " has " + rows.size() + " regions that will be split.");
        // prepare the split file
        Path tmpFile = new Path(tableDir, "_balancedSplit_prepare");
        FSDataOutputStream tmpOut = fs.create(tmpFile);
        // calculate all the splits == [daughterRegions] = [(start, splitPoint)]
        for (Pair<byte[], byte[]> r : rows) {
            byte[] splitPoint = splitAlgo.split(r.getFirst(), r.getSecond());
            String startStr = splitAlgo.rowToStr(r.getFirst());
            String splitStr = splitAlgo.rowToStr(splitPoint);
            daughterRegions.add(Pair.newPair(startStr, splitStr));
            LOG.debug("Will Split [" + startStr + " , " + splitAlgo.rowToStr(r.getSecond()) + ") at " + splitStr);
            tmpOut.writeChars("+ " + startStr + splitAlgo.separator() + splitStr + "\n");
        }
        tmpOut.close();
        fs.rename(tmpFile, splitFile);
    } else {
        LOG.debug("_balancedSplit file found. Replay log to restore state...");
        FSUtils.getInstance(fs, connection.getConfiguration()).recoverFileLease(fs, splitFile, connection.getConfiguration(), null);
        // parse split file and process remaining splits
        FSDataInputStream tmpIn = fs.open(splitFile);
        StringBuilder sb = new StringBuilder(tmpIn.available());
        while (tmpIn.available() > 0) {
            sb.append(tmpIn.readChar());
        }
        tmpIn.close();
        for (String line : sb.toString().split("\n")) {
            String[] cmd = line.split(splitAlgo.separator());
            Preconditions.checkArgument(3 == cmd.length);
            byte[] start = splitAlgo.strToRow(cmd[1]);
            String startStr = splitAlgo.rowToStr(start);
            byte[] splitPoint = splitAlgo.strToRow(cmd[2]);
            String splitStr = splitAlgo.rowToStr(splitPoint);
            Pair<String, String> r = Pair.newPair(startStr, splitStr);
            if (cmd[0].equals("+")) {
                LOG.debug("Adding: " + r);
                daughterRegions.add(r);
            } else {
                LOG.debug("Removing: " + r);
                Preconditions.checkArgument(cmd[0].equals("-"), "Unknown option: " + cmd[0]);
                Preconditions.checkState(daughterRegions.contains(r), "Missing row: " + r);
                daughterRegions.remove(r);
            }
        }
        LOG.debug("Done reading. " + daughterRegions.size() + " regions left.");
    }
    LinkedList<Pair<byte[], byte[]>> ret = Lists.newLinkedList();
    for (Pair<String, String> r : daughterRegions) {
        ret.add(Pair.newPair(splitAlgo.strToRow(r.getFirst()), splitAlgo.strToRow(r.getSecond())));
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) FileSystem(org.apache.hadoop.fs.FileSystem) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 43 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class TestHBaseFsckOneRS method testLingeringSplitParent.

/**
   * A split parent in meta, in hdfs, and not deployed
   */
@Test(timeout = 180000)
public void testLingeringSplitParent() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    Table meta = null;
    try {
        setupTable(tableName);
        assertEquals(ROWKEYS.length, countRows());
        // make sure data in regions, if in wal only there is no data loss
        admin.flush(tableName);
        HRegionLocation location;
        try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
            location = rl.getRegionLocation(Bytes.toBytes("B"));
        }
        // Delete one region from meta, but not hdfs, unassign it.
        deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, true, false);
        // Create a new meta entry to fake it as a split parent.
        meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
        HRegionInfo hri = location.getRegionInfo();
        HRegionInfo a = new HRegionInfo(tbl.getName(), Bytes.toBytes("B"), Bytes.toBytes("BM"));
        HRegionInfo b = new HRegionInfo(tbl.getName(), Bytes.toBytes("BM"), Bytes.toBytes("C"));
        hri.setOffline(true);
        hri.setSplit(true);
        MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
        meta.close();
        admin.flush(TableName.META_TABLE_NAME);
        HBaseFsck hbck = doFsck(conf, false);
        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
        // regular repair cannot fix lingering split parent
        hbck = doFsck(conf, true);
        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
        assertFalse(hbck.shouldRerun());
        hbck = doFsck(conf, false);
        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
        // fix lingering split parent
        hbck = new HBaseFsck(conf, hbfsckExecutorService);
        hbck.connect();
        // i.e. -details
        HBaseFsck.setDisplayFullReport();
        hbck.setTimeLag(0);
        hbck.setFixSplitParents(true);
        hbck.onlineHbck();
        assertTrue(hbck.shouldRerun());
        hbck.close();
        Get get = new Get(hri.getRegionName());
        Result result = meta.get(get);
        assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER).isEmpty());
        assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER).isEmpty());
        admin.flush(TableName.META_TABLE_NAME);
        // fix other issues
        doFsck(conf, true);
        // check that all are fixed
        assertNoErrors(doFsck(conf, false));
        assertEquals(ROWKEYS.length, countRows());
    } finally {
        cleanupTable(tableName);
        IOUtils.closeQuietly(meta);
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) Get(org.apache.hadoop.hbase.client.Get) Result(org.apache.hadoop.hbase.client.Result) Test(org.junit.Test)

Example 44 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class TestHBaseFsckOneRS method testHbckAfterRegionMerge.

@Test(timeout = 180000)
public void testHbckAfterRegionMerge() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    Table meta = null;
    try {
        // disable CatalogJanitor
        TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
        setupTable(tableName);
        assertEquals(ROWKEYS.length, countRows());
        try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) {
            // make sure data in regions, if in wal only there is no data loss
            admin.flush(tableName);
            HRegionInfo region1 = rl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
            HRegionInfo region2 = rl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
            int regionCountBeforeMerge = rl.getAllRegionLocations().size();
            assertNotEquals(region1, region2);
            // do a region merge
            admin.mergeRegionsAsync(region1.getEncodedNameAsBytes(), region2.getEncodedNameAsBytes(), false);
            // wait until region merged
            long timeout = System.currentTimeMillis() + 30 * 1000;
            while (true) {
                if (rl.getAllRegionLocations().size() < regionCountBeforeMerge) {
                    break;
                } else if (System.currentTimeMillis() > timeout) {
                    fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2.getEncodedName() + " be merged");
                }
                Thread.sleep(10);
            }
            assertEquals(ROWKEYS.length, countRows());
            HBaseFsck hbck = doFsck(conf, false);
            // no errors
            assertNoErrors(hbck);
        }
    } finally {
        TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
        cleanupTable(tableName);
        IOUtils.closeQuietly(meta);
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Test(org.junit.Test)

Example 45 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project phoenix by apache.

the class PhoenixInputFormat method generateSplits.

private List<InputSplit> generateSplits(final JobConf jobConf, final QueryPlan qplan, final List<KeyRange> splits, String query) throws IOException {
    Preconditions.checkNotNull(qplan);
    Preconditions.checkNotNull(splits);
    final List<InputSplit> psplits = Lists.newArrayListWithExpectedSize(splits.size());
    Path[] tablePaths = FileInputFormat.getInputPaths(ShimLoader.getHadoopShims().newJobContext(new Job(jobConf)));
    boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS, false);
    setScanCacheSize(jobConf);
    // Adding Localization
    HConnection connection = HConnectionManager.createConnection(PhoenixConnectionUtil.getConfiguration(jobConf));
    RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan.getTableRef().getTable().getPhysicalName().toString()));
    RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin());
    for (List<Scan> scans : qplan.getScans()) {
        PhoenixInputSplit inputSplit;
        HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow(), false);
        long regionSize = sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName());
        String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG);
        if (splitByStats) {
            for (Scan aScan : scans) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Split for  scan : " + aScan + "with scanAttribute : " + aScan.getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" + aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan.getBatch() + "] and  regionLocation : " + regionLocation);
                }
                inputSplit = new PhoenixInputSplit(Lists.newArrayList(aScan), tablePaths[0], regionLocation, regionSize);
                inputSplit.setQuery(query);
                psplits.add(inputSplit);
            }
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans.get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans.size() - 1).getStopRow()));
                LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans.get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " + "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks() + ", " + scans.get(0).getBatch() + "] and  regionLocation : " + regionLocation);
                for (int i = 0, limit = scans.size(); i < limit; i++) {
                    LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes.toStringBinary(scans.get(i).getAttribute(BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY)));
                }
            }
            inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation, regionSize);
            inputSplit.setQuery(query);
            psplits.add(inputSplit);
        }
    }
    return psplits;
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) RegionSizeCalculator(org.apache.hadoop.hbase.util.RegionSizeCalculator) HConnection(org.apache.hadoop.hbase.client.HConnection) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) Scan(org.apache.hadoop.hbase.client.Scan) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapred.InputSplit)

Aggregations

RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)84 Table (org.apache.hadoop.hbase.client.Table)59 Test (org.junit.Test)49 TableName (org.apache.hadoop.hbase.TableName)39 Admin (org.apache.hadoop.hbase.client.Admin)33 Path (org.apache.hadoop.fs.Path)31 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)30 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)29 Connection (org.apache.hadoop.hbase.client.Connection)25 Configuration (org.apache.hadoop.conf.Configuration)21 IOException (java.io.IOException)19 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)15 FileSystem (org.apache.hadoop.fs.FileSystem)14 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)13 ServerName (org.apache.hadoop.hbase.ServerName)13 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)12 ClusterConnection (org.apache.hadoop.hbase.client.ClusterConnection)10 Put (org.apache.hadoop.hbase.client.Put)10 ArrayList (java.util.ArrayList)9 Result (org.apache.hadoop.hbase.client.Result)8