Search in sources :

Example 46 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class TwoTierCompactionStrategyTest method testFileSubsetCompaction.

@Test
public void testFileSubsetCompaction() throws IOException {
    ttcs.init(opts);
    conf = DefaultConfiguration.getInstance();
    KeyExtent ke = new KeyExtent(Table.ID.of("0"), null, null);
    mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, conf);
    Map<FileRef, DataFileValue> fileMap = createFileMap("f1", "1G", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "10M", "f6", "10M", "f7", "10M");
    Map<FileRef, DataFileValue> filesToCompactMap = createFileMap("f2", "10M", "f3", "10M", "f4", "10M", "f5", "10M", "f6", "10M", "f7", "10M");
    mcr.setFiles(fileMap);
    Assert.assertTrue(ttcs.shouldCompact(mcr));
    Assert.assertEquals(7, mcr.getFiles().size());
    List<FileRef> filesToCompact = ttcs.getCompactionPlan(mcr).inputFiles;
    Assert.assertEquals(filesToCompactMap.keySet(), new HashSet<>(filesToCompact));
    Assert.assertEquals(6, filesToCompact.size());
    Assert.assertEquals(null, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType());
}
Also used : DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) FileRef(org.apache.accumulo.server.fs.FileRef) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) Test(org.junit.Test)

Example 47 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class ConfigurableCompactionStrategyTest method testOutputOptions.

// file selection options are adequately tested by ShellServerIT
@Test
public void testOutputOptions() throws Exception {
    MajorCompactionRequest mcr = new MajorCompactionRequest(new KeyExtent(Table.ID.of("1"), null, null), MajorCompactionReason.USER, null);
    Map<FileRef, DataFileValue> files = new HashMap<>();
    files.put(new FileRef("hdfs://nn1/accumulo/tables/1/t-009/F00001.rf"), new DataFileValue(50000, 400));
    mcr.setFiles(files);
    // test setting no output options
    ConfigurableCompactionStrategy ccs = new ConfigurableCompactionStrategy();
    Map<String, String> opts = new HashMap<>();
    ccs.init(opts);
    CompactionPlan plan = ccs.getCompactionPlan(mcr);
    Assert.assertEquals(0, plan.writeParameters.getBlockSize());
    Assert.assertEquals(0, plan.writeParameters.getHdfsBlockSize());
    Assert.assertEquals(0, plan.writeParameters.getIndexBlockSize());
    Assert.assertEquals(0, plan.writeParameters.getReplication());
    Assert.assertEquals(null, plan.writeParameters.getCompressType());
    // test setting all output options
    ccs = new ConfigurableCompactionStrategy();
    CompactionSettings.OUTPUT_BLOCK_SIZE_OPT.put(opts, "64K");
    CompactionSettings.OUTPUT_COMPRESSION_OPT.put(opts, "snappy");
    CompactionSettings.OUTPUT_HDFS_BLOCK_SIZE_OPT.put(opts, "256M");
    CompactionSettings.OUTPUT_INDEX_BLOCK_SIZE_OPT.put(opts, "32K");
    CompactionSettings.OUTPUT_REPLICATION_OPT.put(opts, "5");
    ccs.init(opts);
    plan = ccs.getCompactionPlan(mcr);
    Assert.assertEquals(ConfigurationTypeHelper.getFixedMemoryAsBytes("64K"), plan.writeParameters.getBlockSize());
    Assert.assertEquals(ConfigurationTypeHelper.getFixedMemoryAsBytes("256M"), plan.writeParameters.getHdfsBlockSize());
    Assert.assertEquals(ConfigurationTypeHelper.getFixedMemoryAsBytes("32K"), plan.writeParameters.getIndexBlockSize());
    Assert.assertEquals(5, plan.writeParameters.getReplication());
    Assert.assertEquals("snappy", plan.writeParameters.getCompressType());
}
Also used : CompactionPlan(org.apache.accumulo.tserver.compaction.CompactionPlan) DataFileValue(org.apache.accumulo.core.metadata.schema.DataFileValue) FileRef(org.apache.accumulo.server.fs.FileRef) HashMap(java.util.HashMap) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) MajorCompactionRequest(org.apache.accumulo.tserver.compaction.MajorCompactionRequest) Test(org.junit.Test)

Example 48 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class CollectTabletStats method reportHdfsBlockLocations.

private static void reportHdfsBlockLocations(List<FileRef> files) throws Exception {
    VolumeManager fs = VolumeManagerImpl.get();
    System.out.println("\t\tFile block report : ");
    for (FileRef file : files) {
        FileStatus status = fs.getFileStatus(file.path());
        if (status.isDirectory()) {
            // assume it is a map file
            status = fs.getFileStatus(new Path(file + "/data"));
        }
        FileSystem ns = fs.getVolumeByPath(file.path()).getFileSystem();
        BlockLocation[] locs = ns.getFileBlockLocations(status, 0, status.getLen());
        System.out.println("\t\t\tBlocks for : " + file);
        for (BlockLocation blockLocation : locs) {
            System.out.printf("\t\t\t\t offset : %,13d  hosts :", blockLocation.getOffset());
            for (String host : blockLocation.getHosts()) {
                System.out.print(" " + host);
            }
            System.out.println();
        }
    }
    System.out.println();
}
Also used : Path(org.apache.hadoop.fs.Path) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) FileStatus(org.apache.hadoop.fs.FileStatus) FileRef(org.apache.accumulo.server.fs.FileRef) FileSystem(org.apache.hadoop.fs.FileSystem) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 49 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class CollectTabletStats method readFiles.

private static int readFiles(VolumeManager fs, AccumuloConfiguration aconf, List<FileRef> files, KeyExtent ke, String[] columns) throws Exception {
    int count = 0;
    HashSet<ByteSequence> columnSet = createColumnBSS(columns);
    for (FileRef file : files) {
        FileSystem ns = fs.getVolumeByPath(file.path()).getFileSystem();
        FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder().forFile(file.path().toString(), ns, ns.getConf()).withTableConfiguration(aconf).build();
        Range range = new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true);
        reader.seek(range, columnSet, columnSet.size() == 0 ? false : true);
        while (reader.hasTop() && !range.afterEndKey(reader.getTopKey())) {
            count++;
            reader.next();
        }
        reader.close();
    }
    return count;
}
Also used : FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) FileRef(org.apache.accumulo.server.fs.FileRef) FileSystem(org.apache.hadoop.fs.FileSystem) Range(org.apache.accumulo.core.data.Range) ByteSequence(org.apache.accumulo.core.data.ByteSequence) ArrayByteSequence(org.apache.accumulo.core.data.ArrayByteSequence)

Example 50 with FileRef

use of org.apache.accumulo.server.fs.FileRef in project accumulo by apache.

the class CollectTabletStats method main.

public static void main(String[] args) throws Exception {
    final CollectOptions opts = new CollectOptions();
    final ScannerOpts scanOpts = new ScannerOpts();
    opts.parseArgs(CollectTabletStats.class.getName(), args, scanOpts);
    String[] columnsTmp = new String[] {};
    if (opts.columns != null)
        columnsTmp = opts.columns.split(",");
    final String[] columns = columnsTmp;
    final VolumeManager fs = VolumeManagerImpl.get();
    Instance instance = opts.getInstance();
    final ServerConfigurationFactory sconf = new ServerConfigurationFactory(instance);
    Credentials creds = new Credentials(opts.getPrincipal(), opts.getToken());
    ClientContext context = new ClientContext(instance, creds, sconf.getSystemConfiguration());
    Table.ID tableId = Tables.getTableId(instance, opts.getTableName());
    if (tableId == null) {
        log.error("Unable to find table named {}", opts.getTableName());
        System.exit(-1);
    }
    TreeMap<KeyExtent, String> tabletLocations = new TreeMap<>();
    List<KeyExtent> candidates = findTablets(context, !opts.selectFarTablets, opts.getTableName(), tabletLocations);
    if (candidates.size() < opts.numThreads) {
        System.err.println("ERROR : Unable to find " + opts.numThreads + " " + (opts.selectFarTablets ? "far" : "local") + " tablets");
        System.exit(-1);
    }
    List<KeyExtent> tabletsToTest = selectRandomTablets(opts.numThreads, candidates);
    Map<KeyExtent, List<FileRef>> tabletFiles = new HashMap<>();
    for (KeyExtent ke : tabletsToTest) {
        List<FileRef> files = getTabletFiles(context, ke);
        tabletFiles.put(ke, files);
    }
    System.out.println();
    System.out.println("run location      : " + InetAddress.getLocalHost().getHostName() + "/" + InetAddress.getLocalHost().getHostAddress());
    System.out.println("num threads       : " + opts.numThreads);
    System.out.println("table             : " + opts.getTableName());
    System.out.println("table id          : " + tableId);
    for (KeyExtent ke : tabletsToTest) {
        System.out.println("\t *** Information about tablet " + ke.getUUID() + " *** ");
        System.out.println("\t\t# files in tablet : " + tabletFiles.get(ke).size());
        System.out.println("\t\ttablet location   : " + tabletLocations.get(ke));
        reportHdfsBlockLocations(tabletFiles.get(ke));
    }
    System.out.println("%n*** RUNNING TEST ***%n");
    ExecutorService threadPool = Executors.newFixedThreadPool(opts.numThreads);
    for (int i = 0; i < opts.iterations; i++) {
        ArrayList<Test> tests = new ArrayList<>();
        for (final KeyExtent ke : tabletsToTest) {
            final List<FileRef> files = tabletFiles.get(ke);
            Test test = new Test(ke) {

                @Override
                public int runTest() throws Exception {
                    return readFiles(fs, sconf.getSystemConfiguration(), files, ke, columns);
                }
            };
            tests.add(test);
        }
        runTest("read files", tests, opts.numThreads, threadPool);
    }
    for (int i = 0; i < opts.iterations; i++) {
        ArrayList<Test> tests = new ArrayList<>();
        for (final KeyExtent ke : tabletsToTest) {
            final List<FileRef> files = tabletFiles.get(ke);
            Test test = new Test(ke) {

                @Override
                public int runTest() throws Exception {
                    return readFilesUsingIterStack(fs, sconf, files, opts.auths, ke, columns, false);
                }
            };
            tests.add(test);
        }
        runTest("read tablet files w/ system iter stack", tests, opts.numThreads, threadPool);
    }
    for (int i = 0; i < opts.iterations; i++) {
        ArrayList<Test> tests = new ArrayList<>();
        for (final KeyExtent ke : tabletsToTest) {
            final List<FileRef> files = tabletFiles.get(ke);
            Test test = new Test(ke) {

                @Override
                public int runTest() throws Exception {
                    return readFilesUsingIterStack(fs, sconf, files, opts.auths, ke, columns, true);
                }
            };
            tests.add(test);
        }
        runTest("read tablet files w/ table iter stack", tests, opts.numThreads, threadPool);
    }
    for (int i = 0; i < opts.iterations; i++) {
        ArrayList<Test> tests = new ArrayList<>();
        final Connector conn = opts.getConnector();
        for (final KeyExtent ke : tabletsToTest) {
            Test test = new Test(ke) {

                @Override
                public int runTest() throws Exception {
                    return scanTablet(conn, opts.getTableName(), opts.auths, scanOpts.scanBatchSize, ke.getPrevEndRow(), ke.getEndRow(), columns);
                }
            };
            tests.add(test);
        }
        runTest("read tablet data through accumulo", tests, opts.numThreads, threadPool);
    }
    for (final KeyExtent ke : tabletsToTest) {
        final Connector conn = opts.getConnector();
        threadPool.submit(new Runnable() {

            @Override
            public void run() {
                try {
                    calcTabletStats(conn, opts.getTableName(), opts.auths, scanOpts.scanBatchSize, ke, columns);
                } catch (Exception e) {
                    log.error("Failed to calculate tablet stats.", e);
                }
            }
        });
    }
    threadPool.shutdown();
}
Also used : VolumeManager(org.apache.accumulo.server.fs.VolumeManager) Connector(org.apache.accumulo.core.client.Connector) Instance(org.apache.accumulo.core.client.Instance) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ServerConfigurationFactory(org.apache.accumulo.server.conf.ServerConfigurationFactory) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) FileRef(org.apache.accumulo.server.fs.FileRef) List(java.util.List) ArrayList(java.util.ArrayList) Table(org.apache.accumulo.core.client.impl.Table) ClientOnRequiredTable(org.apache.accumulo.server.cli.ClientOnRequiredTable) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) TreeMap(java.util.TreeMap) IOException(java.io.IOException) ScannerOpts(org.apache.accumulo.core.cli.ScannerOpts) ExecutorService(java.util.concurrent.ExecutorService) Credentials(org.apache.accumulo.core.client.impl.Credentials)

Aggregations

FileRef (org.apache.accumulo.server.fs.FileRef)62 DataFileValue (org.apache.accumulo.core.metadata.schema.DataFileValue)36 Value (org.apache.accumulo.core.data.Value)17 Key (org.apache.accumulo.core.data.Key)16 ArrayList (java.util.ArrayList)15 HashMap (java.util.HashMap)13 KeyExtent (org.apache.accumulo.core.data.impl.KeyExtent)13 IOException (java.io.IOException)12 Test (org.junit.Test)12 Text (org.apache.hadoop.io.Text)11 Mutation (org.apache.accumulo.core.data.Mutation)10 VolumeManager (org.apache.accumulo.server.fs.VolumeManager)10 Scanner (org.apache.accumulo.core.client.Scanner)9 PartialKey (org.apache.accumulo.core.data.PartialKey)9 TreeMap (java.util.TreeMap)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 Path (org.apache.hadoop.fs.Path)8 HashSet (java.util.HashSet)7 IsolatedScanner (org.apache.accumulo.core.client.IsolatedScanner)6 ScannerImpl (org.apache.accumulo.core.client.impl.ScannerImpl)6