Search in sources :

Example 1 with WorkPool

use of io.zulia.client.pool.WorkPool in project zuliasearch by zuliaio.

the class ZuliaCmdUtil method index.

public static void index(String inputDir, String recordsFilename, String idField, String index, ZuliaWorkPool workPool, AtomicInteger count, Integer threads, Boolean skipExistingFiles) throws Exception {
    WorkPool threadPool = new WorkPool(threads);
    try (BufferedReader b = new BufferedReader(new FileReader(recordsFilename))) {
        String line;
        while ((line = b.readLine()) != null) {
            final String record = line;
            threadPool.executeAsync((Callable<Void>) () -> {
                try {
                    Document document = Document.parse(record);
                    String id = null;
                    if (idField != null) {
                        id = document.getString(idField);
                    }
                    if (id == null) {
                        // fall through to just "id"
                        id = document.getString("id");
                    }
                    if (id == null) {
                        // if still null, throw exception
                        throw new RuntimeException("No id for record: " + document.toJson());
                    }
                    document.put("indexTime", new Date());
                    Store store = new Store(id, index);
                    store.setResultDocument(new ResultDocBuilder().setDocument(document));
                    workPool.store(store);
                    String fullPathToFile = inputDir + File.separator + id.replaceAll("/", "_") + ".zip";
                    if (Files.exists(Paths.get(fullPathToFile))) {
                        File destDir = new File(inputDir + File.separator + UUID.randomUUID() + "_tempWork");
                        byte[] buffer = new byte[1024];
                        try (ZipArchiveInputStream inputStream = new ZipArchiveInputStream(new FileInputStream(Paths.get(fullPathToFile).toFile()))) {
                            ZipArchiveEntry zipEntry;
                            while ((zipEntry = inputStream.getNextZipEntry()) != null) {
                                decompressZipEntryToDisk(destDir, buffer, inputStream, zipEntry);
                            }
                        }
                        // ensure the file was extractable
                        if (Files.exists(destDir.toPath())) {
                            List<Path> tempFiles = Files.list(destDir.toPath()).collect(Collectors.toList());
                            for (Path path : tempFiles) {
                                if (path.toFile().isDirectory()) {
                                    try {
                                        List<Path> filesPaths = Files.list(path).collect(Collectors.toList());
                                        Document meta = null;
                                        byte[] associatedBytes = new byte[0];
                                        String filename = null;
                                        for (Path filePath : filesPaths) {
                                            try {
                                                if (filePath.toFile().getName().endsWith("_metadata.json")) {
                                                    meta = Document.parse(Files.readString(filePath));
                                                } else {
                                                    associatedBytes = Files.readAllBytes(filePath);
                                                    filename = filePath.toFile().getName();
                                                }
                                            } catch (Throwable t) {
                                                LOG.log(Level.SEVERE, "Could not restore associated file <" + filename + ">", t);
                                            }
                                        }
                                        if (skipExistingFiles) {
                                            if (!fileExists(workPool, id, filename, index)) {
                                                storeAssociatedDoc(index, workPool, id, filename, meta, associatedBytes);
                                            }
                                        } else {
                                            storeAssociatedDoc(index, workPool, id, filename, meta, associatedBytes);
                                        }
                                    } catch (Throwable t) {
                                        LOG.log(Level.SEVERE, "Could not list the individual files for dir <" + path.getFileName() + ">");
                                    }
                                } else {
                                    LOG.log(Level.SEVERE, "Top level file that shouldn't exist: " + path.getFileName());
                                }
                            }
                            // clean up temp work
                            Files.walk(destDir.toPath()).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
                        } else {
                        // LOG.log(Level.SEVERE, "Could not extract file <" + fullPathToFile + ">");
                        }
                    }
                    int i = count.incrementAndGet();
                    if (i % 10000 == 0) {
                        LOG.info("So far indexed <" + i + "> for index <" + index + ">");
                    }
                    return null;
                } catch (Exception e) {
                    LOG.log(Level.SEVERE, e.getMessage(), e);
                    return null;
                }
            });
        }
    } finally {
        threadPool.shutdown();
    }
}
Also used : Path(java.nio.file.Path) WorkPool(io.zulia.client.pool.WorkPool) ZuliaWorkPool(io.zulia.client.pool.ZuliaWorkPool) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) Store(io.zulia.client.command.Store) Document(org.bson.Document) Date(java.util.Date) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader) ZipArchiveEntry(org.apache.commons.compress.archivers.zip.ZipArchiveEntry) FileReader(java.io.FileReader) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File) ResultDocBuilder(io.zulia.doc.ResultDocBuilder)

Example 2 with WorkPool

use of io.zulia.client.pool.WorkPool in project zuliasearch by zuliaio.

the class ZuliaDump method fetchAssociatedDocs.

private static void fetchAssociatedDocs(ZuliaWorkPool workPool, String index, String outputDir, Set<String> uniqueIds) throws Exception {
    String zuliaDumpDir = outputDir + File.separator + "zuliadump";
    String indOutputDir = zuliaDumpDir + File.separator + index;
    LOG.info("Starting to dump associated docs for <" + uniqueIds.size() + "> documents.");
    AtomicInteger count = new AtomicInteger(0);
    WorkPool threadPool = new WorkPool(4);
    for (String uniqueId : uniqueIds) {
        threadPool.executeAsync(() -> {
            workPool.fetchLargeAssociated(new FetchLargeAssociated(uniqueId, index, Paths.get(indOutputDir + File.separator + uniqueId.replaceAll("/", "_") + ".zip").toFile()));
            if (count.incrementAndGet() % 1000 == 0) {
                LOG.info("Associated docs dumped so far: " + count);
            }
            return null;
        });
    }
    LOG.info("Finished dumping associated docs for <" + uniqueIds.size() + "> documents.");
    try {
        threadPool.shutdown();
    } catch (Throwable t) {
        LOG.log(Level.SEVERE, "Could not shut down the thread pool.", t);
        System.exit(9);
    }
}
Also used : WorkPool(io.zulia.client.pool.WorkPool) ZuliaWorkPool(io.zulia.client.pool.ZuliaWorkPool) FetchLargeAssociated(io.zulia.client.command.FetchLargeAssociated) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Aggregations

WorkPool (io.zulia.client.pool.WorkPool)2 ZuliaWorkPool (io.zulia.client.pool.ZuliaWorkPool)2 FetchLargeAssociated (io.zulia.client.command.FetchLargeAssociated)1 Store (io.zulia.client.command.Store)1 ResultDocBuilder (io.zulia.doc.ResultDocBuilder)1 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileReader (java.io.FileReader)1 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 List (java.util.List)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 ZipArchiveEntry (org.apache.commons.compress.archivers.zip.ZipArchiveEntry)1 ZipArchiveInputStream (org.apache.commons.compress.archivers.zip.ZipArchiveInputStream)1 Document (org.bson.Document)1