use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class CodeIndexerTest method testIndexDocumentsEmptyIssue.
// TODO fix the assert rather then programming by exception
public void testIndexDocumentsEmptyIssue() {
try {
CodeIndexDocument cid = new CodeIndexDocument("repoLocationRepoNameLocationFilename", "", "fileName", "fileLocation", "fileLocationFilename", "md5hash", "languageName", 0, null, "repoRemoteLocation", "codeOwner");
Queue queue = new ConcurrentArrayQueue<CodeIndexDocument>();
queue.add(cid);
Singleton.getCodeIndexer().indexDocuments(queue);
} catch (Exception ex) {
assertTrue(false);
}
}
use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class CodeSearcherTest method testGetProjectStats.
public void testGetProjectStats() throws IOException {
CodeIndexDocument codeIndexDocument = new CodeIndexDocument("/", "testGetRepoDocuments", "/", "/", "/", "md5hash", "Java", 10, "", "/", "/");
Singleton.getCodeIndexer().indexDocument(codeIndexDocument);
CodeSearcher cs = new CodeSearcher();
ProjectStats projectStats = cs.getProjectStats("testGetRepoDocuments");
assertThat(projectStats.getTotalFiles()).isEqualTo(1);
assertThat(projectStats.getTotalCodeLines()).isEqualTo(10);
assertThat(projectStats.getCodeFacetLanguages().get(0).getLanguageName()).isEqualTo("Java");
assertThat(projectStats.getCodeFacetLanguages().get(0).getCount()).isEqualTo(1);
assertThat(projectStats.getRepoFacetOwner().get(0).getOwner()).isEqualTo("/");
assertThat(projectStats.getRepoFacetOwner().get(0).getCount()).isEqualTo(1);
assertThat(projectStats.getCodeByLines().size()).isEqualTo(1);
}
use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class CodeIndexerTest method testBuildDocument.
public void testBuildDocument() {
CodeIndexer codeIndexer = new CodeIndexer();
Document indexableFields = codeIndexer.buildDocument(new CodeIndexDocument("repoLocationRepoNameLocationFilename", "repo Name", "fileName", "fileLocation", "fileLocationFilename", "md5hash", "language Name", 10, "contents", "repoRemoteLocation", "code Owner"));
assertThat(indexableFields.getFields().size()).isEqualTo(16);
IndexableField[] fields = indexableFields.getFields(Values.REPONAME);
assertThat(fields[0].stringValue()).isEqualTo("repo_Name");
fields = indexableFields.getFields(Values.LANGUAGENAME);
assertThat(fields[0].stringValue()).isEqualTo("language_Name");
fields = indexableFields.getFields(Values.CODEOWNER);
assertThat(fields[0].stringValue()).isEqualTo("code_Owner");
// Verifies that we ran through the pipeline
fields = indexableFields.getFields(Values.CONTENTS);
assertThat(fields[0].stringValue()).isEqualTo(" filename filename filename filename filename filename file name filelocationfilename filelocation contents contents contents contents contents contents");
}
use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class IndexBaseRepoJob method indexDocsByDelta.
/**
* Indexes all the documents in the repository changed file effectively performing a delta update
* Should only be called when there is a genuine update IE something was indexed previously and
* has has a new commit.
*/
public void indexDocsByDelta(Path path, String repoName, String repoLocations, String repoRemoteLocation, RepositoryChanged repositoryChanged) {
// Should have data object by this point
SearchcodeLib scl = Singleton.getSearchCodeLib();
Queue<CodeIndexDocument> codeIndexDocumentQueue = Singleton.getCodeIndexQueue();
String fileRepoLocations = FilenameUtils.separatorsToUnix(repoLocations);
// Used to hold the reports of what was indexed
List<String[]> reportList = new ArrayList<>();
for (String changedFile : repositoryChanged.getChangedFiles()) {
if (this.shouldJobPauseOrTerminate()) {
return;
}
if (Singleton.getDataService().getPersistentDelete().contains(repoName)) {
return;
}
String[] split = changedFile.split("/");
String fileName = split[split.length - 1];
changedFile = fileRepoLocations + "/" + repoName + "/" + changedFile;
changedFile = changedFile.replace("//", "/");
CodeLinesReturn codeLinesReturn = this.getCodeLines(changedFile, reportList);
if (codeLinesReturn.isError()) {
break;
}
IsMinifiedReturn isMinified = this.getIsMinified(codeLinesReturn.getCodeLines(), fileName, reportList);
if (isMinified.isMinified()) {
break;
}
if (this.checkIfEmpty(codeLinesReturn.getCodeLines(), changedFile, reportList)) {
break;
}
if (this.determineBinary(changedFile, fileName, codeLinesReturn.getCodeLines(), reportList)) {
break;
}
String md5Hash = this.getFileMd5(changedFile);
String languageName = Singleton.getFileClassifier().languageGuesser(changedFile, codeLinesReturn.getCodeLines());
String fileLocation = this.getRelativeToProjectPath(path.toString(), changedFile);
String fileLocationFilename = changedFile.replace(fileRepoLocations, Values.EMPTYSTRING);
String repoLocationRepoNameLocationFilename = changedFile;
String newString = this.getBlameFilePath(fileLocationFilename);
String codeOwner = this.getCodeOwner(codeLinesReturn.getCodeLines(), newString, repoName, fileRepoLocations, scl);
if (this.LOWMEMORY) {
try {
Singleton.getCodeIndexer().indexDocument(new CodeIndexDocument(repoLocationRepoNameLocationFilename, repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName, codeLinesReturn.getCodeLines().size(), StringUtils.join(codeLinesReturn.getCodeLines(), " "), repoRemoteLocation, codeOwner));
} catch (IOException ex) {
Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass() + "\n with message: " + ex.getMessage());
}
} else {
this.sharedService.incrementCodeIndexLinesCount(codeLinesReturn.getCodeLines().size());
codeIndexDocumentQueue.add(new CodeIndexDocument(repoLocationRepoNameLocationFilename, repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName, codeLinesReturn.getCodeLines().size(), StringUtils.join(codeLinesReturn.getCodeLines(), " "), repoRemoteLocation, codeOwner));
}
if (this.LOGINDEXED) {
reportList.add(new String[] { changedFile, "included", "" });
}
}
if (this.LOGINDEXED && reportList.isEmpty() == false) {
this.logIndexed(repoName + "_delta", reportList);
}
for (String deletedFile : repositoryChanged.getDeletedFiles()) {
deletedFile = fileRepoLocations + "/" + repoName + "/" + deletedFile;
deletedFile = deletedFile.replace("//", "/");
Singleton.getLogger().info("Missing from disk, removing from index " + deletedFile);
try {
Singleton.getCodeIndexer().deleteByCodeId(DigestUtils.sha1Hex(deletedFile));
} catch (IOException ex) {
Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass() + " indexDocsByDelta deleteByFileLocationFilename for " + repoName + " " + deletedFile + "\n with message: " + ex.getMessage());
}
}
}
use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class CodeIndexer method indexDocuments.
/**
* Given a queue of documents to index, index them by popping the queue limited to default of 1000 items.
* This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
* index.
* TODO investigate how Lucene deals with multiple writes
*/
public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION);
Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION);
Analyzer analyzer = new CodeAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
FacetsConfig facetsConfig;
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);
try {
CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
int count = 0;
while (codeIndexDocument != null) {
Singleton.getLogger().info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
Document doc = this.buildDocument(codeIndexDocument);
writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc));
count++;
if (count >= INDEX_QUEUE_BATCH_SIZE) {
codeIndexDocument = null;
} else {
codeIndexDocument = codeIndexDocumentQueue.poll();
}
}
} finally {
try {
writer.close();
} finally {
taxonomyWriter.close();
}
Singleton.getLogger().info("Closing writers");
}
}
Aggregations