use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class CodeIndexer method buildDocument.
/**
* Builds a document ready to be indexed by lucene
*/
public Document buildDocument(CodeIndexDocument codeIndexDocument) {
Document document = new Document();
// Path is the primary key for documents
// needs to include repo location, project name and then filepath including file
Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename(), Field.Store.YES);
document.add(pathField);
if (!Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName())) {
document.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName()));
}
if (!Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName())) {
document.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
}
if (!Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner())) {
document.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
}
this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents());
StringBuilder indexContents = new StringBuilder();
indexContents.append(this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getFileName())).append(" ");
indexContents.append(this.searchcodeLib.splitKeywords(codeIndexDocument.getFileName())).append(" ");
indexContents.append(codeIndexDocument.getFileLocationFilename()).append(" ");
indexContents.append(codeIndexDocument.getFileLocation());
indexContents.append(this.searchcodeLib.splitKeywords(codeIndexDocument.getContents()));
indexContents.append(this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents()));
indexContents.append(this.searchcodeLib.findInterestingKeywords(codeIndexDocument.getContents()));
indexContents.append(this.searchcodeLib.findInterestingCharacters(codeIndexDocument.getContents()));
document.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName().replace(" ", "_"), Field.Store.YES));
document.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
document.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
document.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES));
document.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
document.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName().replace(" ", "_"), Field.Store.YES));
document.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
document.add(new TextField(Values.CONTENTS, indexContents.toString().toLowerCase(), Field.Store.NO));
document.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES));
document.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner().replace(" ", "_"), Field.Store.YES));
document.add(new TextField(Values.CODEID, codeIndexDocument.getHash(), Field.Store.YES));
// Extra metadata in this case when it was last indexed
document.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));
return document;
}
use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class SearchcodeFileVisitor method visitFile.
@Override
public FileVisitResult visitFile(Object file, BasicFileAttributes attrs) throws IOException {
try {
java.nio.file.Path filePath = (java.nio.file.Path) file;
if (this.indexBaseRepoJob.shouldJobPauseOrTerminate()) {
return FileVisitResult.TERMINATE;
}
if (Singleton.getDataService().getPersistentDelete().contains(this.repoName)) {
return FileVisitResult.TERMINATE;
}
// Convert Path file to unix style that way everything is easier to reason about
String fileParent = FilenameUtils.separatorsToUnix(filePath.getParent().toString());
String fileToString = FilenameUtils.separatorsToUnix(filePath.toString());
String fileName = filePath.getFileName().toString();
if (this.indexBaseRepoJob.ignoreFile(fileParent)) {
return FileVisitResult.CONTINUE;
}
// This needs to be the primary key of the file
fileLocationsMap.put(fileToString, null);
IndexBaseRepoJob.CodeLinesReturn codeLinesReturn = this.indexBaseRepoJob.getCodeLines(fileToString, reportList);
if (codeLinesReturn.isError()) {
fileLocationsMap.remove(fileToString);
return FileVisitResult.CONTINUE;
}
IndexBaseRepoJob.IsMinifiedReturn isMinified = this.indexBaseRepoJob.getIsMinified(codeLinesReturn.getCodeLines(), fileName, reportList);
if (isMinified.isMinified()) {
return FileVisitResult.CONTINUE;
}
if (this.indexBaseRepoJob.checkIfEmpty(codeLinesReturn.getCodeLines(), fileName, reportList)) {
return FileVisitResult.CONTINUE;
}
if (this.indexBaseRepoJob.determineBinary(fileToString, fileName, codeLinesReturn.getCodeLines(), reportList)) {
fileLocationsMap.remove(fileToString);
return FileVisitResult.CONTINUE;
}
String md5Hash = this.indexBaseRepoJob.getFileMd5(fileToString);
String languageName = Singleton.getFileClassifier().languageGuesser(fileName, codeLinesReturn.getCodeLines());
String fileLocation = this.indexBaseRepoJob.getRelativeToProjectPath(file.toString(), fileToString);
String fileLocationFilename = this.indexBaseRepoJob.getFileLocationFilename(fileToString, fileRepoLocations);
String newString = this.indexBaseRepoJob.getBlameFilePath(fileLocationFilename);
String codeOwner = this.indexBaseRepoJob.getCodeOwner(codeLinesReturn.getCodeLines(), newString, this.repoName, fileRepoLocations, Singleton.getSearchCodeLib());
if (this.indexBaseRepoJob.LOWMEMORY) {
Singleton.getCodeIndexer().indexDocument(new CodeIndexDocument(fileToString, this.repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName, codeLinesReturn.getCodeLines().size(), StringUtils.join(codeLinesReturn.getCodeLines(), " "), repoRemoteLocation, codeOwner));
} else {
Singleton.getSharedService().incrementCodeIndexLinesCount(codeLinesReturn.getCodeLines().size());
Singleton.getCodeIndexQueue().add(new CodeIndexDocument(fileToString, this.repoName, fileName, fileLocation, fileLocationFilename, md5Hash, languageName, codeLinesReturn.getCodeLines().size(), StringUtils.join(codeLinesReturn.getCodeLines(), " "), repoRemoteLocation, codeOwner));
}
if (this.indexBaseRepoJob.LOGINDEXED) {
reportList.add(new String[] { fileToString, "included", Values.EMPTYSTRING });
}
} catch (Exception ex) {
Singleton.getLogger().warning("ERROR - caught a " + ex.getClass() + " in " + this.getClass() + " indexDocsByPath walkFileTree\n with message: " + ex.getMessage() + " for file " + file.toString() + " in path " + file + " in repo " + this.repoName);
}
// Continue at all costs
return FileVisitResult.CONTINUE;
}
use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class CodeIndexer method indexTimeDocuments.
/**
* Given a queue of documents to index, index them by popping the queue limited to 1000 items.
* This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
* index.
* TODO investigate how Lucene deals with multiple writes
*/
public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
// Index all documents and commit at the end for performance gains
Directory dir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION)));
Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION)));
Analyzer analyzer = new CodeAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
FacetsConfig facetsConfig;
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir, iwc);
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir);
try {
CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
int count = 0;
while (codeIndexDocument != null) {
Singleton.getLogger().info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
Document doc = new Document();
// Path is the primary key for documents
// needs to include repo location, project name and then filepath including file and revision
Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES);
doc.add(pathField);
// Add in facets
facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY);
facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH);
facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR);
facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION);
facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED);
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6)));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4)));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted()));
}
String indexContents = Values.EMPTYSTRING;
indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents());
indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents());
// Store in spelling corrector
this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents());
indexContents = indexContents.toLowerCase();
doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES));
doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES));
doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES));
doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO));
doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES));
doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES));
doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES));
doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES));
doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES));
// Extra metadata in this case when it was last indexed
doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));
writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc));
count++;
if (count >= INDEX_QUEUE_BATCH_SIZE) {
codeIndexDocument = null;
} else {
codeIndexDocument = codeIndexDocumentQueue.poll();
}
}
} finally {
Singleton.getLogger().info("Closing writers");
writer.close();
taxoWriter.close();
}
}
use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class IndexGitHistoryJob method getRevisionChanges.
public void getRevisionChanges(Repository localRepository, Git git, GitChangeSet oldRevison, GitChangeSet newRevision) throws IOException, GitAPIException {
ObjectId oldHead = localRepository.resolve(oldRevison.getRevision() + "^{tree}");
ObjectId newHead = localRepository.resolve(newRevision.getRevision() + "^{tree}");
ObjectReader reader = localRepository.newObjectReader();
CanonicalTreeParser oldTreeIter = new CanonicalTreeParser();
oldTreeIter.reset(reader, oldHead);
CanonicalTreeParser newTreeIter = new CanonicalTreeParser();
newTreeIter.reset(reader, newHead);
List<DiffEntry> entries = git.diff().setNewTree(newTreeIter).setOldTree(oldTreeIter).call();
GitService gs = new GitService();
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
for (DiffEntry entry : entries) {
if ("DELETE".equals(entry.getChangeType().name())) {
System.out.println("DEL " + entry.getOldPath());
String contents = gs.fetchFileRevision(localRepository.getWorkTree().toString() + "/.git", oldRevison.getRevision(), entry.getOldPath());
CodeIndexDocument cd = new CodeIndexDocument(entry.getNewPath(), "thumbor", entry.getOldPath(), entry.getOldPath(), entry.getOldPath(), "md5hash", "Java", contents.split("\\r?\\n").length, contents, "", oldRevison.getAuthor());
cd.setRevision(oldRevison.getRevision());
cd.setYearMonthDay(sdf.format(oldRevison.getExpiry()));
cd.setYearMonth(cd.getYearMonthDay().substring(0, 6));
cd.setYear(cd.getYearMonthDay().substring(0, 4));
cd.setMessage(oldRevison.getMessage());
cd.setDeleted("TRUE");
Singleton.getCodeIndexer().indexTimeDocument(cd);
} else {
System.out.println("ADD " + entry.getNewPath());
String contents = gs.fetchFileRevision(localRepository.getWorkTree().toString() + "/.git", newRevision.getRevision(), entry.getNewPath());
CodeIndexDocument cd = new CodeIndexDocument(entry.getNewPath(), "thumbor", entry.getNewPath(), entry.getNewPath(), entry.getNewPath(), "md5hash", "Java", contents.split("\\r?\\n").length, contents, "", newRevision.getAuthor());
cd.setRevision(newRevision.getRevision());
cd.setYearMonthDay(sdf.format(oldRevison.getExpiry()));
cd.setYearMonth(cd.getYearMonthDay().substring(0, 6));
cd.setYear(cd.getYearMonthDay().substring(0, 4));
cd.setMessage(newRevision.getMessage());
cd.setDeleted("FALSE");
Singleton.getCodeIndexer().indexTimeDocument(cd);
}
}
}
use of com.searchcode.app.dto.CodeIndexDocument in project searchcode-server by boyter.
the class CodeSearcherTest method testGetRepoDocuments.
// Integration Test
public void testGetRepoDocuments() throws IOException {
CodeIndexDocument codeIndexDocument = new CodeIndexDocument("/", "testGetRepoDocuments", "/", "/", "/", "md5hash", "Java", 10, "", "/", "/");
Singleton.getCodeIndexer().indexDocument(codeIndexDocument);
CodeSearcher cs = new CodeSearcher();
List<String> testGetRepoDocuments = cs.getRepoDocuments("testGetRepoDocuments", 0);
assertThat(testGetRepoDocuments).hasSize(1);
testGetRepoDocuments = cs.getRepoDocuments("testGetRepoDocuments", 1);
assertThat(testGetRepoDocuments).hasSize(0);
}
Aggregations