SupoSE-SolrIntegration-525.diff
| src/test/java/com/soebes/supose/InitRepository.java (Arbeitskopie) | ||
|---|---|---|
| 29 | 29 |
import java.io.FileInputStream; |
| 30 | 30 |
import java.io.FileNotFoundException; |
| 31 | 31 |
import java.io.IOException; |
| 32 |
import java.net.MalformedURLException; |
|
| 33 |
import java.net.URL; |
|
| 32 | 34 |
|
| 33 | 35 |
import org.apache.log4j.Logger; |
| 34 |
import org.apache.lucene.analysis.Analyzer; |
|
| 35 |
import org.apache.lucene.index.CorruptIndexException; |
|
| 36 |
import org.apache.lucene.index.IndexWriter; |
|
| 36 |
import org.apache.solr.client.solrj.SolrServerException; |
|
| 37 |
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; |
|
| 38 |
import org.testng.annotations.AfterSuite; |
|
| 37 | 39 |
import org.testng.annotations.BeforeSuite; |
| 38 | 40 |
import org.tmatesoft.svn.core.SVNException; |
| 39 | 41 |
import org.tmatesoft.svn.core.SVNURL; |
| ... | ... | |
| 42 | 44 |
import org.tmatesoft.svn.core.wc.SVNWCUtil; |
| 43 | 45 |
import org.tmatesoft.svn.core.wc.admin.SVNAdminClient; |
| 44 | 46 |
|
| 45 |
import com.soebes.supose.index.Index; |
|
| 47 |
import com.soebes.supose.index.IndexerFactory; |
|
| 48 |
import com.soebes.supose.index.Writer; |
|
| 46 | 49 |
import com.soebes.supose.repository.Repository; |
| 47 | 50 |
import com.soebes.supose.scan.ScanRepository; |
| 48 |
import com.soebes.supose.utility.AnalyzerFactory; |
|
| 49 | 51 |
|
| 50 | 52 |
|
| 51 | 53 |
/** |
| ... | ... | |
| 62 | 64 |
private static Logger LOGGER = Logger.getLogger(InitRepository.class); |
| 63 | 65 |
|
| 64 | 66 |
private SVNURL repositoryURL = null; |
| 67 |
private String solrUrl = "http://localhost:8983/solr"; |
|
| 65 | 68 |
|
| 66 |
private static ScanRepository scanRepository = new ScanRepository(); |
|
| 67 | 69 |
|
| 68 | 70 |
/** |
| 69 | 71 |
* The first step is to create a test repository which |
| ... | ... | |
| 73 | 75 |
* @throws FileNotFoundException |
| 74 | 76 |
* @throws SVNException |
| 75 | 77 |
* @throws FileNotFoundException |
| 78 |
* @throws MalformedURLException |
|
| 76 | 79 |
*/ |
| 77 | 80 |
@BeforeSuite |
| 78 |
public void beforeSuite() throws FileNotFoundException, SVNException {
|
|
| 81 |
public void beforeSuite() throws FileNotFoundException, SVNException, MalformedURLException {
|
|
| 79 | 82 |
createRepository(); |
| 80 | 83 |
scanRepos(); |
| 81 | 84 |
} |
| ... | ... | |
| 97 | 100 |
admin.doVerify(new File(getRepositoryDirectory())); |
| 98 | 101 |
} |
| 99 | 102 |
|
| 100 |
public void scanRepos() throws SVNException {
|
|
| 101 |
Index index = new Index (); |
|
| 102 |
//We will create a new one if --create is given on command line |
|
| 103 |
//otherwise we will append to the existing index. |
|
| 104 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
|
| 105 |
index.setAnalyzer(analyzer); |
|
| 106 |
//For the test we allways create the index. |
|
| 107 |
index.setCreate(true); |
|
| 108 |
IndexWriter indexWriter = index.createIndexWriter(getIndexDirectory()); |
|
| 103 |
public void scanRepos() throws SVNException, MalformedURLException {
|
|
| 104 |
try {
|
|
| 105 |
// try to ping solr, if its up and no exception is thrown, do a scan and store index using solr |
|
| 106 |
new CommonsHttpSolrServer(solrUrl).ping(); |
|
| 107 |
scanReposSolr(); |
|
| 108 |
} catch (MalformedURLException e) {
|
|
| 109 |
} catch (SolrServerException e) {
|
|
| 110 |
} catch (IOException e) {
|
|
| 111 |
} |
|
| 109 | 112 |
|
| 113 |
scanReposLucene(); |
|
| 114 |
|
|
| 115 |
} |
|
| 116 |
|
|
| 117 |
private void scanReposLucene() throws SVNException {
|
|
| 118 |
//For the test we allways create the index and using LuceneIndexer to store the index |
|
| 119 |
Writer indexWriter = IndexerFactory.getIndexWriter(getIndexDirectory(), true); |
|
| 120 |
|
|
| 110 | 121 |
ISVNAuthenticationManager authManager = SVNWCUtil.createDefaultAuthenticationManager( |
| 111 | 122 |
"", |
| 112 | 123 |
"" |
| ... | ... | |
| 115 | 126 |
SVNURL url = SVNURL.fromFile(new File(repositoryDir)); |
| 116 | 127 |
Repository repository = new Repository("file://" + url.getURIEncodedPath(), authManager);
|
| 117 | 128 |
|
| 118 |
scanRepository.setRepository(repository); |
|
| 129 |
ScanRepository luceneScaner = new ScanRepository(); |
|
| 130 |
luceneScaner.setRepository(repository); |
|
| 119 | 131 |
|
| 120 | 132 |
//We start with the revision which is given on the command line. |
| 121 | 133 |
//If it is not given we will start with revision 1. |
| 122 |
scanRepository.setStartRevision(1); |
|
| 134 |
luceneScaner.setStartRevision(1); |
|
| 123 | 135 |
//We will scan the repository to the current HEAD of the repository. |
| 124 |
scanRepository.setEndRevision(-1); |
|
| 136 |
luceneScaner.setEndRevision(-1); |
|
| 125 | 137 |
|
| 126 | 138 |
LOGGER.info("Scanning started.");
|
| 127 |
scanRepository.scan(indexWriter); |
|
| 139 |
luceneScaner.scan(indexWriter); |
|
| 128 | 140 |
LOGGER.info("Scanning ready.");
|
| 129 | 141 |
|
| 130 |
try {
|
|
| 131 |
indexWriter.optimize(); |
|
| 132 |
indexWriter.close(); |
|
| 133 |
} catch (CorruptIndexException e) {
|
|
| 134 |
LOGGER.error("CorruptIndexException: Error during optimization of index: ", e);
|
|
| 135 |
} catch (IOException e) {
|
|
| 136 |
LOGGER.error("IOException: Error during optimization of index: ", e);
|
|
| 137 |
} |
|
| 142 |
indexWriter.optimize(); |
|
| 143 |
indexWriter.close(); |
|
| 144 |
|
|
| 138 | 145 |
} |
| 139 | 146 |
|
| 147 |
private void scanReposSolr() throws SVNException, MalformedURLException {
|
|
| 148 |
ISVNAuthenticationManager authManager = SVNWCUtil.createDefaultAuthenticationManager( |
|
| 149 |
"", |
|
| 150 |
"" |
|
| 151 |
); |
|
| 152 |
String repositoryDir = getRepositoryDirectory(); |
|
| 153 |
SVNURL url = SVNURL.fromFile(new File(repositoryDir)); |
|
| 154 |
Repository repository = new Repository("file://" + url.getURIEncodedPath(), authManager);
|
|
| 155 |
|
|
| 156 |
Writer solrIindexWriter = IndexerFactory.getIndexWriter(new URL("http://localhost:8983/solr"));
|
|
| 157 |
ScanRepository solrScanner = new ScanRepository(); |
|
| 158 |
solrScanner.setRepository(repository); |
|
| 159 |
//We start with the revision which is given on the command line. |
|
| 160 |
//If it is not given we will start with revision 1. |
|
| 161 |
solrScanner.setStartRevision(1); |
|
| 162 |
//We will scan the repository to the current HEAD of the repository. |
|
| 163 |
solrScanner.setEndRevision(-1); |
|
| 164 |
|
|
| 165 |
LOGGER.info("Scanning started.");
|
|
| 166 |
solrScanner.scan(solrIindexWriter); |
|
| 167 |
LOGGER.info("Scanning ready.");
|
|
| 168 |
|
|
| 169 |
solrIindexWriter.optimize(); |
|
| 170 |
solrIindexWriter.close(); |
|
| 171 |
} |
|
| 172 |
|
|
| 173 |
@AfterSuite |
|
| 174 |
public void afterSuite() {
|
|
| 175 |
// do some cleanup |
|
| 176 |
|
|
| 177 |
} |
|
| 178 |
|
|
| 140 | 179 |
} |
| src/test/java/com/soebes/supose/scan/SearchRepositoryGetResultTest.java (Arbeitskopie) | ||
|---|---|---|
| 34 | 34 |
|
| 35 | 35 |
import org.apache.log4j.Logger; |
| 36 | 36 |
import org.apache.lucene.index.CorruptIndexException; |
| 37 |
import org.apache.lucene.index.IndexReader; |
|
| 38 | 37 |
import org.testng.annotations.AfterClass; |
| 39 | 38 |
import org.testng.annotations.BeforeClass; |
| 40 | 39 |
import org.testng.annotations.Test; |
| 41 | 40 |
|
| 42 | 41 |
import com.soebes.supose.FieldNames; |
| 43 | 42 |
import com.soebes.supose.TestBase; |
| 43 |
import com.soebes.supose.index.IndexHelper; |
|
| 44 |
import com.soebes.supose.index.IndexerFactory; |
|
| 45 |
import com.soebes.supose.index.Reader; |
|
| 44 | 46 |
import com.soebes.supose.search.ResultEntry; |
| 45 |
import com.soebes.supose.search.SearchRepository; |
|
| 46 | 47 |
|
| 47 | 48 |
@Test |
| 48 | 49 |
public class SearchRepositoryGetResultTest extends TestBase {
|
| 49 | 50 |
private static Logger LOGGER = Logger.getLogger(SearchRepositoryGetResultTest.class); |
| 50 | 51 |
|
| 51 |
private static SearchRepository searchRepository = new SearchRepository(); |
|
| 52 |
private static Reader indexReader = null; |
|
| 52 | 53 |
|
| 53 | 54 |
@BeforeClass |
| 54 | 55 |
public void beforeClass() {
|
| 55 |
searchRepository.setIndexDirectory(getIndexDirectory()); |
|
| 56 |
indexReader = IndexerFactory.getIndexReader(getIndexDirectory()); |
|
| 56 | 57 |
} |
| 57 | 58 |
@AfterClass |
| 58 | 59 |
public void afterClass() throws IOException {
|
| 59 |
IndexReader reader = searchRepository.getReader(); |
|
| 60 |
reader.close(); |
|
| 60 |
indexReader.close(); |
|
| 61 | 61 |
} |
| 62 | 62 |
|
| 63 | 63 |
public void testQueryForFilenameOnly() {
|
| 64 |
List<ResultEntry> result = searchRepository.getResult("+filename:f1.txt");
|
|
| 64 |
List<ResultEntry> result = indexReader.getResult("+filename:f1.txt");
|
|
| 65 | 65 |
assertEquals(result.size(), 4); |
| 66 | 66 |
} |
| 67 | 67 |
|
| 68 | 68 |
public void testBug246QueryForFilenameWithHyphenAsterik() {
|
| 69 |
List<ResultEntry> result = searchRepository.getResult("+filename:testEXCEL*.xls");
|
|
| 69 |
List<ResultEntry> result = indexReader.getResult("+filename:testEXCEL*.xls");
|
|
| 70 | 70 |
assertEquals(result.size(), 2); |
| 71 | 71 |
} |
| 72 | 72 |
|
| 73 | 73 |
public void testBug246QueryForFilenameWithHyphenAndDot() {
|
| 74 |
List<ResultEntry> result = searchRepository.getResult("+filename:\"testEXCEL\\-formats.xls\"");
|
|
| 74 |
List<ResultEntry> result = indexReader.getResult("+filename:\"testEXCEL\\-formats.xls\"");
|
|
| 75 | 75 |
assertEquals(result.size(), 1); |
| 76 | 76 |
} |
| 77 | 77 |
|
| 78 | 78 |
public void testBug246QueryForFilenameWithHyphenNotQuoted() {
|
| 79 |
List<ResultEntry> result = searchRepository.getResult("+filename:testEXCEL\\-formats\\.xls");
|
|
| 79 |
List<ResultEntry> result = indexReader.getResult("+filename:testEXCEL\\-formats\\.xls");
|
|
| 80 | 80 |
assertEquals(result.size(), 1); |
| 81 | 81 |
} |
| 82 | 82 |
|
| 83 | 83 |
public void testBug246QueryForFilenameWithDotReplacedByQuestionMark() {
|
| 84 |
List<ResultEntry> result = searchRepository.getResult("+filename:testEXCEL-formats?xls");
|
|
| 84 |
List<ResultEntry> result = indexReader.getResult("+filename:testEXCEL-formats?xls");
|
|
| 85 | 85 |
assertEquals(result.size(), 1); |
| 86 | 86 |
} |
| 87 | 87 |
|
| 88 | 88 |
public void testQueryForFilenameOnlyUppercase() {
|
| 89 |
List<ResultEntry> result = searchRepository.getResult("+filename:F1.txt");
|
|
| 89 |
List<ResultEntry> result = indexReader.getResult("+filename:F1.txt");
|
|
| 90 | 90 |
assertEquals(result.size(), 4); |
| 91 | 91 |
} |
| 92 | 92 |
|
| 93 | 93 |
public void testQueryForFilenameMixedCaseTestPPT() {
|
| 94 |
List<ResultEntry> result = searchRepository.getResult("+filename:testPPT.*");
|
|
| 94 |
List<ResultEntry> result = indexReader.getResult("+filename:testPPT.*");
|
|
| 95 | 95 |
assertEquals(result.size(), 2); |
| 96 | 96 |
} |
| 97 | 97 |
|
| 98 | 98 |
public void testQueryForFilenameLowercaseTestPPT() {
|
| 99 |
List<ResultEntry> result = searchRepository.getResult("+filename:testppt.*");
|
|
| 99 |
List<ResultEntry> result = indexReader.getResult("+filename:testppt.*");
|
|
| 100 | 100 |
assertEquals(result.size(), 2); |
| 101 | 101 |
} |
| 102 | 102 |
|
| 103 | 103 |
public void testQueryForFilenameWithPrefixedWildcardTextFiles() {
|
| 104 |
List<ResultEntry> result = searchRepository.getResult("+filename:*.txt");
|
|
| 104 |
List<ResultEntry> result = indexReader.getResult("+filename:*.txt");
|
|
| 105 | 105 |
assertEquals(result.size(), 8); |
| 106 | 106 |
} |
| 107 | 107 |
|
| 108 | 108 |
public void testQueryForFilenameWithPrefixedWildcardExcelFiles() {
|
| 109 |
List<ResultEntry> result = searchRepository.getResult("+filename:*.xls");
|
|
| 109 |
List<ResultEntry> result = indexReader.getResult("+filename:*.xls");
|
|
| 110 | 110 |
assertEquals(result.size(), 2); |
| 111 | 111 |
} |
| 112 | 112 |
|
| 113 | 113 |
public void testQueryForFilenameWithPrefixedWildcardExcel2007Files() {
|
| 114 |
List<ResultEntry> result = searchRepository.getResult("+filename:*.xlsx");
|
|
| 114 |
List<ResultEntry> result = indexReader.getResult("+filename:*.xlsx");
|
|
| 115 | 115 |
assertEquals(result.size(), 2); |
| 116 | 116 |
} |
| 117 | 117 |
|
| 118 | 118 |
public void testQueryForPathMixedCase() {
|
| 119 |
List<ResultEntry> result = searchRepository.getResult("+path:/*/B_*");
|
|
| 119 |
List<ResultEntry> result = indexReader.getResult("+path:/*/B_*");
|
|
| 120 | 120 |
assertEquals(result.size(), 6); |
| 121 | 121 |
} |
| 122 | 122 |
|
| 123 | 123 |
public void testQueryForPathLowerCase() {
|
| 124 |
List<ResultEntry> result = searchRepository.getResult("+path:/*/b_*");
|
|
| 124 |
List<ResultEntry> result = indexReader.getResult("+path:/*/b_*");
|
|
| 125 | 125 |
assertEquals(result.size(), 6); |
| 126 | 126 |
} |
| 127 | 127 |
|
| 128 | 128 |
public void testQueryForTermForExcelWorksheet() {
|
| 129 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"Sample Excel Worksheet\"");
|
|
| 129 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Excel Worksheet\"");
|
|
| 130 | 130 |
assertEquals(result.size(), 2); |
| 131 | 131 |
} |
| 132 | 132 |
|
| 133 | 133 |
public void testQueryForTermForExcelWorksheetCombination() {
|
| 134 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"Sample Excel Worksheet\" +filename:*.xls");
|
|
| 134 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Excel Worksheet\" +filename:*.xls");
|
|
| 135 | 135 |
assertEquals(result.size(), 1); |
| 136 | 136 |
} |
| 137 | 137 |
|
| 138 | 138 |
public void testQueryForTermFromWord() {
|
| 139 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"Sample Word\"");
|
|
| 139 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Word\"");
|
|
| 140 | 140 |
assertEquals(result.size(), 2); |
| 141 | 141 |
} |
| 142 | 142 |
|
| 143 | 143 |
public void testQueryForTermFromWordCombination() {
|
| 144 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"Sample Word\" +filename:*.doc");
|
|
| 144 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Word\" +filename:*.doc");
|
|
| 145 | 145 |
assertEquals(result.size(), 1); |
| 146 | 146 |
} |
| 147 | 147 |
|
| 148 | 148 |
public void testQueryForTermOfPowerPoint() {
|
| 149 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"Sample Powerpoint\"");
|
|
| 149 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Powerpoint\"");
|
|
| 150 | 150 |
assertEquals(result.size(), 2); |
| 151 | 151 |
} |
| 152 | 152 |
|
| 153 | 153 |
public void testQueryOpenOfficeODP() {
|
| 154 | 154 |
//Das ist ein Test mit OpenOffice 3.0 auf Windows XP |
| 155 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"OpenOffice 3.0 auf Windows XP\"");
|
|
| 155 |
List<ResultEntry> result = indexReader.getResult("+content:\"OpenOffice 3.0 auf Windows XP\"");
|
|
| 156 | 156 |
assertEquals(result.size(), 1); |
| 157 | 157 |
} |
| 158 | 158 |
|
| ... | ... | |
| 160 | 160 |
//Test Mit OpenOffice |
| 161 | 161 |
//3.0 |
| 162 | 162 |
//Windows XP |
| 163 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"Test Mit OpenOffice 3.0 Windows XP\"");
|
|
| 163 |
List<ResultEntry> result = indexReader.getResult("+content:\"Test Mit OpenOffice 3.0 Windows XP\"");
|
|
| 164 | 164 |
assertEquals(result.size(), 1); |
| 165 | 165 |
} |
| 166 | 166 |
|
| ... | ... | |
| 169 | 169 |
//In OpenOffice |
| 170 | 170 |
//3.0 |
| 171 | 171 |
//Windows XP |
| 172 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"This is a Test In OpenOffice 3.0 Windows XP\"");
|
|
| 172 |
List<ResultEntry> result = indexReader.getResult("+content:\"This is a Test In OpenOffice 3.0 Windows XP\"");
|
|
| 173 | 173 |
assertEquals(result.size(), 1); |
| 174 | 174 |
} |
| 175 | 175 |
|
| 176 | 176 |
public void testQueryArchiveContentsTAR() {
|
| 177 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"This file is contined in a archive\"");
|
|
| 177 |
List<ResultEntry> result = indexReader.getResult("+content:\"This file is contined in a archive\"");
|
|
| 178 | 178 |
assertEquals(result.size(), 1); |
| 179 | 179 |
} |
| 180 | 180 |
|
| 181 | 181 |
public void testQueryArchiveContentsZIP() {
|
| 182 |
List<ResultEntry> result = searchRepository.getResult("+contents:\"This file is contents of a zip archive\"");
|
|
| 182 |
List<ResultEntry> result = indexReader.getResult("+content:\"This file is contents of a zip archive\"");
|
|
| 183 | 183 |
assertEquals(result.size(), 1); |
| 184 | 184 |
} |
| 185 | 185 |
|
| 186 | 186 |
public void testQueryForTagsOfAllKind() {
|
| 187 |
List<ResultEntry> result = searchRepository.getResult("+tag:*");
|
|
| 187 |
List<ResultEntry> result = indexReader.getResult("+tag:*");
|
|
| 188 | 188 |
//This will be 4 entries which are coming from the tag entry |
| 189 | 189 |
//and one entry which is coming from the maventag. |
| 190 | 190 |
assertEquals(result.size(), 7); |
| 191 | 191 |
} |
| 192 | 192 |
|
| 193 | 193 |
public void testQueryForMavenTags() {
|
| 194 |
List<ResultEntry> result = searchRepository.getResult("+maventag:*");
|
|
| 194 |
List<ResultEntry> result = indexReader.getResult("+maventag:*");
|
|
| 195 | 195 |
assertEquals(result.size(), 4); |
| 196 | 196 |
} |
| 197 | 197 |
|
| 198 | 198 |
public void testQueryForTagsOnly() {
|
| 199 |
List<ResultEntry> result = searchRepository.getResult("+tag:* -maventag:* -subversiontag:*");
|
|
| 199 |
List<ResultEntry> result = indexReader.getResult("+tag:* -maventag:* -subversiontag:*");
|
|
| 200 | 200 |
//This has to be result of the tags only. |
| 201 | 201 |
assertEquals(result.size(), 1); |
| 202 | 202 |
} |
| 203 | 203 |
|
| 204 | 204 |
public void testQueryForSubversionTagsOnly() {
|
| 205 |
List<ResultEntry> result = searchRepository.getResult("+subversiontag:*");
|
|
| 205 |
List<ResultEntry> result = indexReader.getResult("+subversiontag:*");
|
|
| 206 | 206 |
//This has to be result into a single entry for the tag. |
| 207 | 207 |
assertEquals(result.size(), 2); |
| 208 | 208 |
} |
| 209 | 209 |
|
| 210 | 210 |
public void testQueryForBranchPath() {
|
| 211 |
List<ResultEntry> result = searchRepository.getResult("+path:*/branches/*");
|
|
| 211 |
List<ResultEntry> result = indexReader.getResult("+path:*/branches/*");
|
|
| 212 | 212 |
assertEquals(result.size(), 7); |
| 213 | 213 |
} |
| 214 | 214 |
|
| 215 | 215 |
public void testQueryForBranches() {
|
| 216 |
List<ResultEntry> result = searchRepository.getResult("+branch:*");
|
|
| 216 |
List<ResultEntry> result = indexReader.getResult("+branch:*");
|
|
| 217 | 217 |
//We have only a single entry here |
| 218 | 218 |
assertEquals(result.size(), 1); |
| 219 | 219 |
} |
| 220 | 220 |
|
| 221 | 221 |
public void testQueryForKind() {
|
| 222 |
List<ResultEntry> result = searchRepository.getResult("+kind:D");
|
|
| 222 |
List<ResultEntry> result = indexReader.getResult("+kind:D");
|
|
| 223 | 223 |
//We have only a single entry here |
| 224 | 224 |
assertEquals(result.size(), 3); |
| 225 | 225 |
} |
| 226 | 226 |
|
| 227 | 227 |
public void testQueryForNode() {
|
| 228 |
List<ResultEntry> result = searchRepository.getResult("+node:dir");
|
|
| 228 |
List<ResultEntry> result = indexReader.getResult("+node:dir");
|
|
| 229 | 229 |
//We have only a single entry here |
| 230 | 230 |
assertEquals(result.size(), 12); |
| 231 | 231 |
} |
| ... | ... | |
| 241 | 241 |
// } |
| 242 | 242 |
|
| 243 | 243 |
public void testQueryForDeletedTag() throws CorruptIndexException, IOException {
|
| 244 |
List<ResultEntry> result = searchRepository.getResult("+path:*/tags/* +kind:d");
|
|
| 244 |
List<ResultEntry> result = indexReader.getResult("+path:*/tags/* +kind:d");
|
|
| 245 | 245 |
assertEquals(result.size(), 1); |
| 246 | 246 |
|
| 247 | 247 |
assertEquals(result.get(0).getFilename().length(), 0, "We have expected to get an empty filename field for a tag which is a directory."); |
| ... | ... | |
| 251 | 251 |
} |
| 252 | 252 |
|
| 253 | 253 |
public void testQueryForSVNProperty() {
|
| 254 |
List<ResultEntry> result = searchRepository.getResult("+svn\\:mergeinfo:*");
|
|
| 254 |
List<ResultEntry> result = indexReader.getResult("+svn\\:mergeinfo:*");
|
|
| 255 | 255 |
assertEquals(result.size(), 3); |
| 256 | 256 |
} |
| 257 | 257 |
|
| 258 | 258 |
public void testQueryForSVNPropertyContent() {
|
| 259 |
List<ResultEntry> result = searchRepository.getResult("+svn\\:mergeinfo:*/branches/*");
|
|
| 259 |
List<ResultEntry> result = indexReader.getResult("+svn\\:mergeinfo:*/branches/*");
|
|
| 260 | 260 |
assertEquals(result.size(), 3); |
| 261 | 261 |
} |
| 262 | 262 |
|
| 263 | 263 |
public void testQueryForSVNPropertyContentFile() {
|
| 264 |
List<ResultEntry> result = searchRepository.getResult("+svn\\:mergeinfo:*/f3.txt\\:*");
|
|
| 264 |
List<ResultEntry> result = indexReader.getResult("+svn\\:mergeinfo:*/f3.txt\\:*");
|
|
| 265 | 265 |
assertEquals(result.size(), 1); |
| 266 | 266 |
} |
| 267 | 267 |
|
| 268 | 268 |
public void testQueryForSVNPropertyContentPath() {
|
| 269 |
List<ResultEntry> result = searchRepository.getResult("+svn\\:mergeinfo:*/B_0.0.2/*");
|
|
| 269 |
List<ResultEntry> result = indexReader.getResult("+svn\\:mergeinfo:*/B_0.0.2/*");
|
|
| 270 | 270 |
assertEquals(result.size(), 1); |
| 271 | 271 |
} |
| 272 | 272 |
|
| ... | ... | |
| 274 | 274 |
* This test is based on issue Bug #215 |
| 275 | 275 |
*/ |
| 276 | 276 |
public void testQueryForREADMEFileIssue215() {
|
| 277 |
List<ResultEntry> result = searchRepository.getResult("+filename:README");
|
|
| 277 |
List<ResultEntry> result = indexReader.getResult("+filename:README");
|
|
| 278 | 278 |
assertEquals(result.size(), 1); |
| 279 | 279 |
assertEquals(result.get(0).getFilename(), "README"); |
| 280 | 280 |
assertEquals(result.get(0).getPath(), "/project1/trunk/"); |
| ... | ... | |
| 283 | 283 |
public void testCallGetterByName() throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
|
| 284 | 284 |
ResultEntry re = new ResultEntry(); |
| 285 | 285 |
re.setAuthor("TestAuthor");
|
| 286 |
re.setRevision("123123123");
|
|
| 287 |
Object result = searchRepository.callGetterByName(re, "revision"); |
|
| 286 |
re.setRevision(new Long(123123123)); |
|
| 287 |
Object result = IndexHelper.callGetterByName(re, "revision"); |
|
| 288 | 288 |
assertEquals(result, re.getRevision()); |
| 289 | 289 |
} |
| 290 | 290 |
} |
| src/test/java/com/soebes/supose/scan/IndexMergeTest.java (Arbeitskopie) | ||
|---|---|---|
| 25 | 25 |
|
| 26 | 26 |
package com.soebes.supose.scan; |
| 27 | 27 |
|
| 28 |
import static org.testng.Assert.assertTrue; |
|
| 29 |
|
|
| 28 | 30 |
import java.io.File; |
| 31 |
import java.util.ArrayList; |
|
| 29 | 32 |
|
| 30 |
import org.apache.lucene.document.Document; |
|
| 31 |
import org.apache.lucene.document.Field; |
|
| 32 |
import org.apache.lucene.index.IndexWriter; |
|
| 33 |
import org.apache.lucene.store.FSDirectory; |
|
| 34 | 33 |
import org.testng.annotations.AfterClass; |
| 35 | 34 |
import org.testng.annotations.Test; |
| 36 | 35 |
|
| 37 |
import com.soebes.supose.index.Index; |
|
| 36 |
import com.soebes.supose.index.IndexerFactory; |
|
| 37 |
import com.soebes.supose.index.Writer; |
|
| 38 |
import com.soebes.supose.search.ResultEntry; |
|
| 38 | 39 |
|
| 39 |
import static org.testng.Assert.*; |
|
| 40 |
|
|
| 41 | 40 |
@Test |
| 42 | 41 |
public class IndexMergeTest {
|
| 43 | 42 |
|
| ... | ... | |
| 60 | 59 |
return (path.delete()); |
| 61 | 60 |
} |
| 62 | 61 |
|
| 63 |
private static void addTokenizedField(Document doc, String fieldName, String value) {
|
|
| 64 |
doc.add(new Field(fieldName, value, Field.Store.YES, Field.Index.ANALYZED)); |
|
| 65 |
} |
|
| 66 |
|
|
| 67 | 62 |
@AfterClass |
| 68 | 63 |
public void afterClass() {
|
| 69 | 64 |
assertTrue(deleteDirectory("index1"), "Something wrong during deletion of index1");
|
| ... | ... | |
| 76 | 71 |
//3. merge the two indexes.. |
| 77 | 72 |
|
| 78 | 73 |
public void testIndex1 () throws Exception {
|
| 79 |
Index index = new Index (); |
|
| 80 |
IndexWriter indexWriter = index.createIndexWriter("index1");
|
|
| 81 |
Document doc = new Document(); |
|
| 82 |
addTokenizedField(doc, "revision", "1"); |
|
| 83 |
addTokenizedField(doc, "revision", "2"); |
|
| 84 |
indexWriter.addDocument(doc); |
|
| 74 |
Writer indexWriter = IndexerFactory.getIndexWriter("index1", true);
|
|
| 75 |
ResultEntry entry = new ResultEntry(); |
|
| 76 |
entry.setRevision(new Long(1)); |
|
| 77 |
indexWriter.addResultEntry(entry); |
|
| 85 | 78 |
indexWriter.close(); |
| 86 | 79 |
} |
| 87 | 80 |
|
| 88 | 81 |
public void testIndex2 () throws Exception {
|
| 89 |
Index index = new Index (); |
|
| 90 |
IndexWriter indexWriter = index.createIndexWriter("index2");
|
|
| 91 |
Document doc = new Document(); |
|
| 92 |
addTokenizedField(doc, "revision", "3"); |
|
| 93 |
addTokenizedField(doc, "revision", "4"); |
|
| 94 |
indexWriter.addDocument(doc); |
|
| 82 |
Writer indexWriter = IndexerFactory.getIndexWriter("index2", true);
|
|
| 83 |
ResultEntry entry = new ResultEntry(); |
|
| 84 |
entry.setRevision(new Long(2)); |
|
| 85 |
indexWriter.addResultEntry(entry); |
|
| 95 | 86 |
indexWriter.close(); |
| 96 | 87 |
} |
| 97 | 88 |
|
| 98 | 89 |
@Test(dependsOnMethods={"testIndex1", "testIndex2"})
|
| 99 | 90 |
public void testMergeIndexes () throws Exception {
|
| 100 |
Index index = new Index (); |
|
| 91 |
Writer indexWriter = IndexerFactory.getIndexWriter("result", true);
|
|
| 101 | 92 |
|
| 102 |
IndexWriter indexWriter = index.createIndexWriter("result");
|
|
| 103 |
|
|
| 104 |
FSDirectory fsDirs[] = { FSDirectory.getDirectory("index1"), FSDirectory.getDirectory("index2") };
|
|
| 93 |
indexWriter.merge("result", new ArrayList<String>() {
|
|
| 94 |
{
|
|
| 95 |
add("index1");
|
|
| 96 |
add("index2");
|
|
| 97 |
} |
|
| 98 |
}); |
|
| 105 | 99 |
|
| 106 |
indexWriter.addIndexesNoOptimize(fsDirs); |
|
| 107 | 100 |
indexWriter.optimize(); |
| 108 | 101 |
indexWriter.close(); |
| 109 | 102 |
} |
| src/test/java/com/soebes/supose/scan/SearchRepositoryGetQueryTest.java (Arbeitskopie) | ||
|---|---|---|
| 26 | 26 |
package com.soebes.supose.scan; |
| 27 | 27 |
|
| 28 | 28 |
import static org.testng.Assert.assertEquals; |
| 29 |
import static org.testng.Assert.*; |
|
| 29 |
import static org.testng.Assert.assertNotNull; |
|
| 30 | 30 |
|
| 31 | 31 |
import java.io.IOException; |
| 32 |
import java.net.MalformedURLException; |
|
| 33 |
import java.net.URL; |
|
| 32 | 34 |
import java.util.List; |
| 33 | 35 |
|
| 34 | 36 |
import org.apache.log4j.Logger; |
| 35 |
import org.apache.lucene.document.Document; |
|
| 36 |
import org.apache.lucene.document.Field; |
|
| 37 | 37 |
import org.apache.lucene.index.CorruptIndexException; |
| 38 |
import org.apache.lucene.index.IndexReader; |
|
| 39 |
import org.apache.lucene.search.TopDocs; |
|
| 38 |
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; |
|
| 40 | 39 |
import org.testng.annotations.AfterClass; |
| 41 |
import org.testng.annotations.BeforeClass; |
|
| 40 |
import org.testng.annotations.Factory; |
|
| 42 | 41 |
import org.testng.annotations.Test; |
| 43 | 42 |
|
| 44 | 43 |
import com.soebes.supose.FieldNames; |
| 45 | 44 |
import com.soebes.supose.TestBase; |
| 46 |
import com.soebes.supose.search.SearchRepository; |
|
| 45 |
import com.soebes.supose.index.IndexerFactory; |
|
| 46 |
import com.soebes.supose.index.Reader; |
|
| 47 |
import com.soebes.supose.search.ResultEntry; |
|
| 47 | 48 |
|
| 48 | 49 |
@Test |
| 49 | 50 |
public class SearchRepositoryGetQueryTest extends TestBase {
|
| 50 | 51 |
private static Logger LOGGER = Logger.getLogger(SearchRepositoryGetQueryTest.class); |
| 51 | 52 |
|
| 52 |
private static SearchRepository searchRepository = new SearchRepository(); |
|
| 53 |
private Reader indexReader = null; |
|
| 54 |
private String solrUrl = "http://localhost:8983/solr"; |
|
| 53 | 55 |
|
| 54 |
@BeforeClass |
|
| 55 |
public void beforeClass() {
|
|
| 56 |
searchRepository.setIndexDirectory(getIndexDirectory()); |
|
| 56 |
public SearchRepositoryGetQueryTest() {
|
|
| 57 |
|
|
| 57 | 58 |
} |
| 59 |
public SearchRepositoryGetQueryTest(Reader indexReader) {
|
|
| 60 |
this.indexReader = indexReader; |
|
| 61 |
} |
|
| 62 |
|
|
| 63 |
@Factory |
|
| 64 |
public Object[] testFactory() {
|
|
| 65 |
Object[] testToRun = null; |
|
| 66 |
try {
|
|
| 67 |
new CommonsHttpSolrServer(solrUrl).ping(); |
|
| 68 |
|
|
| 69 |
testToRun = new Object[2]; |
|
| 70 |
testToRun[0] = new SearchRepositoryGetQueryTest(IndexerFactory.getIndexReader(new URL(solrUrl))); |
|
| 71 |
testToRun[1] = new SearchRepositoryGetQueryTest(IndexerFactory.getIndexReader(getIndexDirectory())); |
|
| 72 |
} catch(Exception e) {
|
|
| 73 |
// do only lucene reader tests |
|
| 74 |
} |
|
| 75 |
|
|
| 76 |
if(testToRun == null) {
|
|
| 77 |
testToRun = new Object[1]; |
|
| 78 |
testToRun[0] = new SearchRepositoryGetQueryTest(IndexerFactory.getIndexReader(getIndexDirectory())); |
|
| 79 |
} |
|
| 80 |
|
|
| 81 |
return testToRun; |
|
| 82 |
|
|
| 83 |
} |
|
| 84 |
|
|
| 58 | 85 |
@AfterClass |
| 59 | 86 |
public void afterClass() throws IOException {
|
| 60 |
IndexReader reader = searchRepository.getReader(); |
|
| 61 |
reader.close(); |
|
| 87 |
indexReader.close(); |
|
| 62 | 88 |
} |
| 63 | 89 |
|
| 64 | 90 |
public void testQueryForFilenameOnly() {
|
| 65 |
TopDocs result = searchRepository.getQueryResult("+filename:f1.txt");
|
|
| 66 |
assertEquals(result.totalHits, 4); |
|
| 91 |
List<ResultEntry> result = indexReader.getResult("+filename:f1.txt");
|
|
| 92 |
assertEquals(result.size(), 4); |
|
| 67 | 93 |
} |
| 68 | 94 |
|
| 69 | 95 |
public void testQueryForFilenameOnlyUppercase() {
|
| 70 |
TopDocs result = searchRepository.getQueryResult("+filename:F1.txt");
|
|
| 71 |
assertEquals(result.totalHits, 4); |
|
| 96 |
List<ResultEntry> result = indexReader.getResult("+filename:F1.txt");
|
|
| 97 |
assertEquals(result.size(), 4); |
|
| 72 | 98 |
} |
| 73 | 99 |
|
| 74 | 100 |
public void testQueryForFilenameMixedCaseTestPPT() {
|
| 75 |
TopDocs result = searchRepository.getQueryResult("+filename:testPPT.*");
|
|
| 76 |
assertEquals(result.totalHits, 2); |
|
| 101 |
List<ResultEntry> result = indexReader.getResult("+filename:testPPT.*");
|
|
| 102 |
assertEquals(result.size(), 2); |
|
| 77 | 103 |
} |
| 78 | 104 |
|
| 79 | 105 |
public void testQueryForFilenameLowercaseTestPPT() {
|
| 80 |
TopDocs result = searchRepository.getQueryResult("+filename:testppt.*");
|
|
| 81 |
assertEquals(result.totalHits, 2); |
|
| 106 |
List<ResultEntry> result = indexReader.getResult("+filename:testppt.*");
|
|
| 107 |
assertEquals(result.size(), 2); |
|
| 82 | 108 |
} |
| 83 | 109 |
|
| 84 | 110 |
public void testQueryForFilenameWithPrefixedWildcardTextFiles() {
|
| 85 |
TopDocs result = searchRepository.getQueryResult("+filename:*.txt");
|
|
| 86 |
assertEquals(result.totalHits, 8); |
|
| 111 |
List<ResultEntry> result = indexReader.getResult("+filename:*.txt");
|
|
| 112 |
assertEquals(result.size(), 8); |
|
| 87 | 113 |
} |
| 88 | 114 |
|
| 89 | 115 |
public void testQueryForFilenameWithPrefixedWildcardExcelFiles() {
|
| 90 |
TopDocs result = searchRepository.getQueryResult("+filename:*.xls");
|
|
| 91 |
assertEquals(result.totalHits, 2); |
|
| 116 |
List<ResultEntry> result = indexReader.getResult("+filename:*.xls");
|
|
| 117 |
assertEquals(result.size(), 2); |
|
| 92 | 118 |
} |
| 93 | 119 |
|
| 94 | 120 |
public void testQueryForFilenameWithPrefixedWildcardExcel2007Files() {
|
| 95 |
TopDocs result = searchRepository.getQueryResult("+filename:*.xlsx");
|
|
| 96 |
assertEquals(result.totalHits, 2); |
|
| 121 |
List<ResultEntry> result = indexReader.getResult("+filename:*.xlsx");
|
|
| 122 |
assertEquals(result.size(), 2); |
|
| 97 | 123 |
} |
| 98 | 124 |
|
| 99 | 125 |
public void testQueryForPathMixedCase() {
|
| 100 |
TopDocs result = searchRepository.getQueryResult("+path:/*/B_*");
|
|
| 101 |
assertEquals(result.totalHits, 6); |
|
| 126 |
List<ResultEntry> result = indexReader.getResult("+path:/*/B_*");
|
|
| 127 |
assertEquals(result.size(), 6); |
|
| 102 | 128 |
} |
| 103 | 129 |
|
| 104 | 130 |
public void testQueryForPathLowerCase() {
|
| 105 |
TopDocs result = searchRepository.getQueryResult("+path:/*/b_*");
|
|
| 106 |
assertEquals(result.totalHits, 6); |
|
| 131 |
List<ResultEntry> result = indexReader.getResult("+path:/*/b_*");
|
|
| 132 |
assertEquals(result.size(), 6); |
|
| 107 | 133 |
} |
| 108 | 134 |
|
| 109 | 135 |
public void testQueryForTermForExcelWorksheet() {
|
| 110 |
TopDocs result = searchRepository.getQueryResult("+contents:\"Sample Excel Worksheet\"");
|
|
| 111 |
assertEquals(result.totalHits, 2); |
|
| 136 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Excel Worksheet\"");
|
|
| 137 |
assertEquals(result.size(), 2); |
|
| 112 | 138 |
} |
| 113 | 139 |
|
| 114 | 140 |
public void testQueryForTermForExcelWorksheetCombination() {
|
| 115 |
TopDocs result = searchRepository.getQueryResult("+contents:\"Sample Excel Worksheet\" +filename:*.xls");
|
|
| 116 |
assertEquals(result.totalHits, 1); |
|
| 141 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Excel Worksheet\" +filename:*.xls");
|
|
| 142 |
assertEquals(result.size(), 1); |
|
| 117 | 143 |
} |
| 118 | 144 |
|
| 119 | 145 |
public void testQueryForTermFromWord() {
|
| 120 |
TopDocs result = searchRepository.getQueryResult("+contents:\"Sample Word\"");
|
|
| 121 |
assertEquals(result.totalHits, 2); |
|
| 146 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Word\"");
|
|
| 147 |
assertEquals(result.size(), 2); |
|
| 122 | 148 |
} |
| 123 | 149 |
|
| 124 | 150 |
public void testQueryForTermFromWordCombination() {
|
| 125 |
TopDocs result = searchRepository.getQueryResult("+contents:\"Sample Word\" +filename:*.doc");
|
|
| 126 |
assertEquals(result.totalHits, 1); |
|
| 151 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Word\" +filename:*.doc");
|
|
| 152 |
assertEquals(result.size(), 1); |
|
| 127 | 153 |
} |
| 128 | 154 |
|
| 129 | 155 |
public void testQueryForTermOfPowerPoint() {
|
| 130 |
TopDocs result = searchRepository.getQueryResult("+contents:\"Sample Powerpoint\"");
|
|
| 131 |
assertEquals(result.totalHits, 2); |
|
| 156 |
List<ResultEntry> result = indexReader.getResult("+content:\"Sample Powerpoint\"");
|
|
| 157 |
assertEquals(result.size(), 2); |
|
| 132 | 158 |
} |
| 133 | 159 |
|
| 134 | 160 |
public void testQueryOpenOfficeODP() {
|
| 135 | 161 |
//Das ist ein Test mit OpenOffice 3.0 auf Windows XP |
| 136 |
TopDocs result = searchRepository.getQueryResult("+contents:\"OpenOffice 3.0 auf Windows XP\"");
|
|
| 137 |
assertEquals(result.totalHits, 1); |
|
| 162 |
List<ResultEntry> result = indexReader.getResult("+content:\"OpenOffice 3.0 auf Windows XP\"");
|
|
| 163 |
assertEquals(result.size(), 1); |
|
| 138 | 164 |
} |
| 139 | 165 |
|
| 140 | 166 |
public void testQueryOpenOfficeODS() {
|
| 141 | 167 |
//Test Mit OpenOffice |
| 142 | 168 |
//3.0 |
| 143 | 169 |
//Windows XP |
| 144 |
TopDocs result = searchRepository.getQueryResult("+contents:\"Test Mit OpenOffice 3.0 Windows XP\"");
|
|
| 145 |
assertEquals(result.totalHits, 1); |
|
| 170 |
List<ResultEntry> result = indexReader.getResult("+content:\"Test Mit OpenOffice 3.0 Windows XP\"");
|
|
| 171 |
assertEquals(result.size(), 1); |
|
| 146 | 172 |
} |
| 147 | 173 |
|
| 148 | 174 |
public void testQueryOpenOfficeODT() {
|
| ... | ... | |
| 150 | 176 |
//In OpenOffice |
| 151 | 177 |
//3.0 |
| 152 | 178 |
//Windows XP |
| 153 |
TopDocs result = searchRepository.getQueryResult("+contents:\"This is a Test In OpenOffice 3.0 Windows XP\"");
|
|
| 154 |
assertEquals(result.totalHits, 1); |
|
| 179 |
List<ResultEntry> result = indexReader.getResult("+content:\"This is a Test In OpenOffice 3.0 Windows XP\"");
|
|
| 180 |
assertEquals(result.size(), 1); |
|
| 155 | 181 |
} |
| 156 | 182 |
|
| 157 | 183 |
public void testQueryArchiveContentsTAR() {
|
| 158 |
TopDocs result = searchRepository.getQueryResult("+contents:\"This file is contined in a archive\"");
|
|
| 159 |
assertEquals(result.totalHits, 1); |
|
| 184 |
List<ResultEntry> result = indexReader.getResult("+content:\"This file is contined in a archive\"");
|
|
| 185 |
assertEquals(result.size(), 1); |
|
| 160 | 186 |
} |
| 161 | 187 |
|
| 162 | 188 |
public void testQueryArchiveContentsZIP() {
|
| 163 |
TopDocs result = searchRepository.getQueryResult("+contents:\"This file is contents of a zip archive\"");
|
|
| 164 |
assertEquals(result.totalHits, 1); |
|
| 189 |
List<ResultEntry> result = indexReader.getResult("+content:\"This file is contents of a zip archive\"");
|
|
| 190 |
assertEquals(result.size(), 1); |
|
| 165 | 191 |
} |
| 166 | 192 |
|
| 167 | 193 |
public void testQueryForTagsOfAllKind() {
|
| 168 |
TopDocs result = searchRepository.getQueryResult("+tag:*");
|
|
| 194 |
List<ResultEntry> result = indexReader.getResult("+tag:*");
|
|
| 169 | 195 |
//This will be 4 entries which are coming from the tag entry |
| 170 | 196 |
//and one entry which is coming from the maventag. |
| 171 |
assertEquals(result.totalHits, 7); |
|
| 197 |
assertEquals(result.size(), 7); |
|
| 172 | 198 |
} |
| 173 | 199 |
|
| 174 | 200 |
public void testQueryForMavenTags() {
|
| 175 |
TopDocs result = searchRepository.getQueryResult("+maventag:*");
|
|
| 176 |
assertEquals(result.totalHits, 4); |
|
| 201 |
List<ResultEntry> result = indexReader.getResult("+maventag:*");
|
|
| 202 |
assertEquals(result.size(), 4); |
|
| 177 | 203 |
} |
| 178 | 204 |
|
| 179 | 205 |
public void testQueryForTagsOnly() {
|
| 180 |
TopDocs result = searchRepository.getQueryResult("+tag:* -maventag:* -subversiontag:*");
|
|
| 206 |
List<ResultEntry> result = indexReader.getResult("+tag:* -maventag:* -subversiontag:*");
|
|
| 181 | 207 |
//This has to be result of the tags only. |
| 182 |
assertEquals(result.totalHits, 1); |
|
| 208 |
assertEquals(result.size(), 1); |
|
| 183 | 209 |
} |
| 184 | 210 |
|
| 185 | 211 |
public void testQueryForSubversionTagsOnly() {
|
| 186 |
TopDocs result = searchRepository.getQueryResult("+subversiontag:*");
|
|
| 212 |
List<ResultEntry> result = indexReader.getResult("+subversiontag:*");
|
|
| 187 | 213 |
//This has to be result into a single entry for the tag. |
| 188 |
assertEquals(result.totalHits, 2); |
|
| 214 |
assertEquals(result.size(), 2); |
|
| 189 | 215 |
} |
| 190 | 216 |
|
| 191 | 217 |
public void testQueryForBranchPath() {
|
| 192 |
TopDocs result = searchRepository.getQueryResult("+path:*/branches/*");
|
|
| 193 |
assertEquals(result.totalHits, 7); |
|
| 218 |
List<ResultEntry> result = indexReader.getResult("+path:*/branches/*");
|
|
| 219 |
assertEquals(result.size(), 7); |
|
| 194 | 220 |
} |
| 195 | 221 |
|
| 196 | 222 |
public void testQueryForBranches() {
|
| 197 |
TopDocs result = searchRepository.getQueryResult("+branch:*");
|
|
| 223 |
List<ResultEntry> result = indexReader.getResult("+branch:*");
|
|
| 198 | 224 |
//We have only a single entry here |
| 199 |
assertEquals(result.totalHits, 1); |
|
| 225 |
assertEquals(result.size(), 1); |
|
| 200 | 226 |
} |
| 201 | 227 |
|
| 202 | 228 |
public void testQueryForKind() {
|
| 203 |
TopDocs result = searchRepository.getQueryResult("+kind:D");
|
|
| 229 |
List<ResultEntry> result = indexReader.getResult("+kind:D");
|
|
| 204 | 230 |
//We have only a single entry here |
| 205 |
assertEquals(result.totalHits, 3); |
|
| 231 |
assertEquals(result.size(), 3); |
|
| 206 | 232 |
} |
| 207 | 233 |
|
| 208 | 234 |
public void testQueryForNode() {
|
| 209 |
TopDocs result = searchRepository.getQueryResult("+node:dir");
|
|
| 235 |
List<ResultEntry> result = indexReader.getResult("+node:dir");
|
|
| 210 | 236 |
//We have only a single entry here |
| 211 |
assertEquals(result.totalHits, 12); |
|
| 237 |
assertEquals(result.size(), 12); |
|
| 212 | 238 |
} |
| 213 | 239 |
|
| 214 |
private Field searchForField (Document hit, String name) {
|
|
| 215 |
Field result = null; |
|
| 216 |
List<Field> fieldList = hit.getFields(); |
|
| 217 |
for (Field field : fieldList) {
|
|
| 218 |
if (field.name().equals(name)) {
|
|
| 219 |
result = field; |
|
| 220 |
} |
|
| 221 |
} |
|
| 222 |
return result; |
|
| 223 |
} |
|
| 224 |
|
|
| 225 | 240 |
public void testQueryForDeletedTag() throws CorruptIndexException, IOException {
|
| 226 |
TopDocs result = searchRepository.getQueryResult("+path:*/tags/* +kind:d");
|
|
| 227 |
assertEquals(result.totalHits, 1); |
|
| 241 |
List<ResultEntry> result = indexReader.getResult("+path:*/tags/* +kind:d");
|
|
| 242 |
assertEquals(result.size(), 1); |
|
| 228 | 243 |
|
| 229 |
Document hit = searchRepository.getSearcher().doc(result.scoreDocs[0].doc); |
|
| 230 |
List<Field> fieldList = hit.getFields(); |
|
| 244 |
ResultEntry doc = result.get(0); |
|
| 231 | 245 |
|
| 232 | 246 |
//This entry is not allowed to have a filename entry!!! |
| 233 |
Field fileNameField = searchForField(hit, FieldNames.FILENAME.getValue()); |
|
| 234 |
assertNotNull(fileNameField, "We have expected to find the " + FieldNames.FILENAME + " field."); |
|
| 235 |
assertEquals(fileNameField.stringValue().length(), 0, "We have expected to get an empty filename field for a tag which is a directory."); |
|
| 247 |
assertNotNull(doc.getFilename(), "We have expected to find the " + FieldNames.FILENAME + " field."); |
|
| 248 |
assertEquals(doc.getFilename().length(), 0, "We have expected to get an empty filename field for a tag which is a directory."); |
|
| 236 | 249 |
|
| 237 |
Field pathField = searchForField(hit, FieldNames.PATH.getValue()); |
|
| 238 |
assertNotNull(pathField, "We have expected to find the " + FieldNames.PATH + " field."); |
|
| 239 |
assertEquals(pathField.stringValue(), "/project1/tags/RELEASE-0.0.1/", "We have expected to get an particular path value"); |
|
| 250 |
assertNotNull(doc.getPath(), "We have expected to find the " + FieldNames.PATH + " field."); |
|
| 251 |
assertEquals(doc.getPath(), "/project1/tags/RELEASE-0.0.1/", "We have expected to get an particular path value"); |
|
| 240 | 252 |
} |
| 241 | 253 |
|
| 242 | 254 |
} |
| src/test/java/com/soebes/supose/TestBase.java (Arbeitskopie) | ||
|---|---|---|
| 26 | 26 |
package com.soebes.supose; |
| 27 | 27 |
|
| 28 | 28 |
import java.io.File; |
| 29 |
import java.io.UnsupportedEncodingException; |
|
| 29 | 30 |
import java.net.URL; |
| 31 |
import java.net.URLDecoder; |
|
| 30 | 32 |
|
| 31 | 33 |
/** |
| 32 | 34 |
* @author Karl Heinz Marbaise |
| ... | ... | |
| 45 | 47 |
* |
| 46 | 48 |
* @param name |
| 47 | 49 |
* @return |
| 50 |
* @throws UnsupportedEncodingException |
|
| 48 | 51 |
*/ |
| 49 | 52 |
public String getFileResource(String name) {
|
| 50 | 53 |
URL url = this.getClass().getResource(name); |
| 51 | 54 |
if (url != null) {
|
| 55 |
try {
|
|
| 56 |
return URLDecoder.decode(url.getFile(), "UTF-8"); |
|
| 57 |
} catch (UnsupportedEncodingException e) { }
|
|
| 52 | 58 |
return url.getFile(); |
| 53 | 59 |
} else {
|
| 54 | 60 |
//We have a file which does not exists |
| src/test/java/com/soebes/supose/lucene/LuceneTest.java (Arbeitskopie) | ||
|---|---|---|
| 78 | 78 |
Document doc = new Document(); |
| 79 | 79 |
String text = "This is the text to be indexed."; |
| 80 | 80 |
addUnTokenizedField(doc, FieldNames.REVISION.getValue(), NumberUtils.pad(1)); |
| 81 |
addTokenizedField(doc, FieldNames.CONTENTS.getValue(), text); |
|
| 81 |
addTokenizedField(doc, FieldNames.CONTENT.getValue(), text); |
|
| 82 | 82 |
addUnTokenizedField(doc, FieldNames.FILENAME.getValue(), "/trunk/doc/testXML.doc"); |
| 83 | 83 |
iwriter.addDocument(doc); |
| 84 | 84 |
|
| 85 | 85 |
doc = new Document(); |
| 86 | 86 |
text = "This is different text."; |
| 87 | 87 |
addUnTokenizedField(doc, FieldNames.REVISION.getValue(), NumberUtils.pad(2)); |
| 88 |
addTokenizedField(doc, FieldNames.CONTENTS.getValue(), text); |
|
| 88 |
addTokenizedField(doc, FieldNames.CONTENT.getValue(), text); |
|
| 89 | 89 |
addUnTokenizedField(doc, FieldNames.FILENAME.getValue(), "/tags/docs/XYZabc.java"); |
| 90 | 90 |
iwriter.addDocument(doc); |
| 91 | 91 |
|
| 92 | 92 |
doc = new Document(); |
| 93 | 93 |
text = "This is more different text."; |
| 94 | 94 |
addUnTokenizedField(doc, FieldNames.REVISION.getValue(), NumberUtils.pad(3)); |
| 95 |
addTokenizedField(doc, FieldNames.CONTENTS.getValue(), text); |
|
| 95 |
addTokenizedField(doc, FieldNames.CONTENT.getValue(), text); |
|
| 96 | 96 |
addUnTokenizedField(doc, FieldNames.FILENAME.getValue(), "/tags/docs/SCMPlan.doc"); |
| 97 | 97 |
iwriter.addDocument(doc); |
| 98 | 98 |
|
| 99 | 99 |
doc = new Document(); |
| 100 | 100 |
text = "This is the third text."; |
| 101 | 101 |
addUnTokenizedField(doc, FieldNames.REVISION.getValue(), NumberUtils.pad(4)); |
| 102 |
addTokenizedField(doc, FieldNames.CONTENTS.getValue(), text); |
|
| 102 |
addTokenizedField(doc, FieldNames.CONTENT.getValue(), text); |
|
| 103 | 103 |
addUnTokenizedField(doc, FieldNames.FILENAME.getValue(), "/trunk/subdir/elviraXML.doc"); |
| 104 | 104 |
iwriter.addDocument(doc); |
| 105 | 105 |
|
| ... | ... | |
| 133 | 133 |
public void testSingleAsterik() throws ParseException, IOException {
|
| 134 | 134 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
| 135 | 135 |
// Parse a simple query that searches for "text": |
| 136 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer); |
|
| 136 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENT, analyzer); |
|
| 137 | 137 |
Query query = parser.parse("+filename:/*.doc");
|
| 138 | 138 |
TopDocs result = isearcher.search(query, null, 10); |
| 139 | 139 |
printOut(query, "testSingleAsterik", result); |
| ... | ... | |
| 145 | 145 |
public void testSingleAsterikWithPrefix() throws ParseException, IOException {
|
| 146 | 146 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
| 147 | 147 |
// Parse a simple query that searches for "text": |
| 148 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer); |
|
| 148 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENT, analyzer); |
|
| 149 | 149 |
Query query = parser.parse("+filename:/trunk/*.doc");
|
| 150 | 150 |
TopDocs result = isearcher.search(query, null, 10); |
| 151 | 151 |
// Iterate through the results: |
| ... | ... | |
| 157 | 157 |
public void testMultipleAsterik() throws ParseException, IOException {
|
| 158 | 158 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
| 159 | 159 |
// Parse a simple query that searches for "text": |
| 160 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer); |
|
| 160 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENT, analyzer); |
|
| 161 | 161 |
Query query = parser.parse("+filename:/*te*.doc");
|
| 162 | 162 |
TopDocs result = isearcher.search(query, null, 10); |
| 163 | 163 |
printOut(query, "testMultipleAsterik", result); |
| ... | ... | |
| 168 | 168 |
public void testMultipleAsterikUppercase() throws ParseException, IOException {
|
| 169 | 169 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
| 170 | 170 |
// Parse a simple query that searches for "text": |
| 171 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer); |
|
| 171 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENT, analyzer); |
|
| 172 | 172 |
parser.setLowercaseExpandedTerms(false); |
| 173 | 173 |
Query query = parser.parse("+filename:/*SCM*.doc");
|
| 174 | 174 |
TopDocs result = isearcher.search(query, null, 10); |
| ... | ... | |
| 180 | 180 |
public void testMultipleAsterikLowerCase() throws ParseException, IOException {
|
| 181 | 181 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
| 182 | 182 |
// Parse a simple query that searches for "text": |
| 183 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer); |
|
| 183 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENT, analyzer); |
|
| 184 | 184 |
parser.setLowercaseExpandedTerms(false); |
| 185 | 185 |
Query query = parser.parse("+filename:/*scm*.doc");
|
| 186 | 186 |
TopDocs result = isearcher.search(query, null, 10); |
| ... | ... | |
| 192 | 192 |
public void testSingleAsterikRestrictionToRevisionRange() throws ParseException, IOException {
|
| 193 | 193 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
| 194 | 194 |
// Parse a simple query that searches for "text": |
| 195 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer); |
|
| 195 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENT, analyzer); |
|
| 196 | 196 |
parser.setLowercaseExpandedTerms(true); |
| 197 | 197 |
Query query = parser.parse("+filename:/*.doc +revision:[1 TO 3]");
|
| 198 | 198 |
TopDocs result = isearcher.search(query, null, 10); |
| ... | ... | |
| 203 | 203 |
public void testSingleAsterikRestrictionToDifferentRevisionRange() throws ParseException, IOException {
|
| 204 | 204 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
| 205 | 205 |
// Parse a simple query that searches for "text": |
| 206 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer); |
|
| 206 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENT, analyzer); |
|
| 207 | 207 |
parser.setLowercaseExpandedTerms(true); |
| 208 | 208 |
Query query = parser.parse("+filename:/*.doc +revision:[1 TO 2]");
|
| 209 | 209 |
TopDocs result = isearcher.search(query, null, 10); |
| ... | ... | |
| 215 | 215 |
public void testSingleRevision() throws ParseException, IOException {
|
| 216 | 216 |
Analyzer analyzer = AnalyzerFactory.createInstance(); |
| 217 | 217 |
// Parse a simple query that searches for "text": |
| 218 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS, analyzer); |
|
| 218 |
QueryParser parser = new CustomQueryParser(FieldNames.CONTENT, analyzer); |
|
| 219 | 219 |
parser.setLowercaseExpandedTerms(true); |
| 220 | 220 |
Query query = parser.parse("+revision:1");
|
| 221 | 221 |
TopDocs result = isearcher.search(query, null, 10); |
| src/test/resources/fileextension.properties (Arbeitskopie) | ||
|---|---|---|
| 1 |
#txt = com.soebes.supose.scan.ScanTxtDocument |
|
| 2 |
doc = com.soebes.supose.scan.document.ScanWordDocument |
|
| 3 |
docx = com.soebes.supose.scan.document.ScanWordDocument |
|
| 4 |
xls = com.soebes.supose.scan.document.ScanExcelDocument |
|
| 5 |
xlsx = com.soebes.supose.scan.document.ScanExcelDocument |
|
| 6 |
ppt = com.soebes.supose.scan.document.ScanPowerPointDocument |
|
| 7 |
pptx = com.soebes.supose.scan.document.ScanPowerPointDocument |
|
| 8 |
pdf = com.soebes.supose.scan.document.ScanPDFDocument |
|
| 9 |
java = com.soebes.supose.scan.document.ScanJavaDocument |
|
| 10 |
xml = com.soebes.supose.scan.document.ScanXMLDocument |
|
| 11 |
html = com.soebes.supose.scan.document.ScanHTMLDocument |
|
| 12 |
rtf = com.soebes.supose.scan.document.ScanRTFDocument |
|
| 13 |
# Open Office Formats |
|
| 14 |
odp = com.soebes.supose.scan.document.ScanODPDocument |
|
| 15 |
ods = com.soebes.supose.scan.document.ScanODSDocument |
|
| 16 |
odt = com.soebes.supose.scan.document.ScanODTDocument |
|
| 17 |
# |
|
| 18 |
# =========================================================== |
|
| 19 |
# FOR THE UNIT TEST WE WILL SCAN THE CONTENTS OF THE ARCHIVES |
|
| 20 |
# =========================================================== |
|
| 21 |
# |
|
| 22 |
# If you like to scan archives with their contents |
|
| 23 |
# just uncomment the following parts instead using |
|
| 24 |
# them with ScanArchiveDocument. |
|
| 25 |
# But be aware of that the scanning process will need |
|
| 26 |
# much more time than scanning only the file names |
|
| 27 |
# inside the archives. |
|
| 28 |
zip = com.soebes.supose.scan.document.ScanArchiveWithContentDocument |
|
| 29 |
jar = com.soebes.supose.scan.document.ScanArchiveWithContentDocument |
|
| 30 |
tar = com.soebes.supose.scan.document.ScanArchiveWithContentDocument |
|
| 31 |
tar.gz = com.soebes.supose.scan.document.ScanArchiveWithContentDocument |
|
| 32 |
tar.bz2 = com.soebes.supose.scan.document.ScanArchiveWithContentDocument |
|
| 33 |
tgz = com.soebes.supose.scan.document.ScanArchiveWithContentDocument |
|
| 34 |
tbz2 = com.soebes.supose.scan.document.ScanArchiveWithContentDocument |
|
| 35 |
# |
|
| 36 |
# This section will configure scanning of archives |
|
| 37 |
# in the way that only the filenames inside the archives |
|
| 38 |
# will be extracted and stored as contents of an archive. |
|
| 39 |
# But no contents of them will be stored. |
|
| 40 |
# |
|
| 41 |
#zip = com.soebes.supose.scan.document.ScanArchiveDocument |
|
| 42 |
#jar = com.soebes.supose.scan.document.ScanArchiveDocument |
|
| 43 |
#tar = com.soebes.supose.scan.document.ScanArchiveDocument |
|
| 44 |
#tar.gz = com.soebes.supose.scan.document.ScanArchiveDocument |
|
| 45 |
#tar.bz2 = com.soebes.supose.scan.document.ScanArchiveDocument |
|
| 46 |
#tgz = com.soebes.supose.scan.document.ScanArchiveDocument |
|
| 47 |
#tbz2 = com.soebes.supose.scan.document.ScanArchiveDocument |
|
| 48 |
# |
|
| 49 |
# The following two entries will be removed if |
|
| 50 |
# we use a C parser |
|
| 51 |
c = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 52 |
h = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 53 |
# GNU M4 |
|
| 54 |
m4 = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 55 |
# Usual text files. |
|
| 56 |
txt = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 57 |
# Translation files for GNU getText() |
|
| 58 |
po = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 59 |
# Texinfo Files |
|
| 60 |
texi = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 61 |
txi = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 62 |
texinfo = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 63 |
# |
|
| 64 |
classpath = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 65 |
project = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 66 |
ini = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 67 |
properties = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 68 |
# Usually from Maven projects |
|
| 69 |
apt = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 70 |
# DOS Batch files |
|
| 71 |
bat = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 72 |
cmd = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 73 |
# |
|
| 74 |
# May be have some times an SQL parser... |
|
| 75 |
sql = com.soebes.supose.scan.document.ScanDefaultDocument |
|
| 1 |
java = com.soebes.supose.parse.JavaDocumentParser |
|
| 2 |
php = com.soebes.supose.parse.PHPDocumentParser |
|
| 3 |
|
|
| 4 |
# all other goes to tika |
|
| src/main/java/com/soebes/supose/scan/ScanRepository.java (Arbeitskopie) | ||
|---|---|---|
| 26 | 26 |
package com.soebes.supose.scan; |
| 27 | 27 |
|
| 28 | 28 |
import java.io.IOException; |
| 29 |
import java.text.SimpleDateFormat; |
|
| 30 | 29 |
import java.util.ArrayList; |
| 31 | 30 |
import java.util.Collection; |
| 32 |
import java.util.Date; |
|
| 33 | 31 |
import java.util.Iterator; |
| 34 | 32 |
import java.util.Set; |
| 35 | 33 |
|
| 36 | 34 |
import org.apache.log4j.Logger; |
| 37 |
import org.apache.lucene.document.Document; |
|
| 38 |
import org.apache.lucene.document.Field; |
|
| 39 |
import org.apache.lucene.index.IndexWriter; |
|
| 40 | 35 |
import org.tmatesoft.svn.core.ISVNLogEntryHandler; |
| 41 | 36 |
import org.tmatesoft.svn.core.SVNAuthenticationException; |
| 42 | 37 |
import org.tmatesoft.svn.core.SVNDirEntry; |
| ... | ... | |
| 46 | 41 |
import org.tmatesoft.svn.core.SVNNodeKind; |
| 47 | 42 |
import org.tmatesoft.svn.core.SVNProperties; |
| 48 | 43 |
|
| 49 |
import com.soebes.supose.FieldNames; |
|
| 44 |
import com.soebes.supose.index.Writer; |
|
| 45 |
import com.soebes.supose.parse.DocumentParser; |
|
| 46 |
import com.soebes.supose.parse.IParse; |
|
| 50 | 47 |
import com.soebes.supose.recognition.TagBranch; |
| 51 | 48 |
import com.soebes.supose.recognition.TagBranchRecognition; |
| 52 | 49 |
import com.soebes.supose.repository.Repository; |
| 53 |
import com.soebes.supose.search.NumberUtils; |
|
| 50 |
import com.soebes.supose.search.ResultEntry; |
|
| 54 | 51 |
import com.soebes.supose.utility.FileName; |
| 55 | 52 |
|
| 56 | 53 |
/** |
| ... | ... | |
| 96 | 93 |
* This method will do the real scanning of the whole repository. |
| 97 | 94 |
* It will extract all log entries as first step and go on with |
| 98 | 95 |
* scanning every change set. |
| 99 |
* @param writer The index where the result of the scanning |
|
| 96 |
* @param indexWriter The index where the result of the scanning |
|
| 100 | 97 |
* will be written to. |
| 101 | 98 |
* @throws SVNException |
| 102 | 99 |
*/ |
| 103 | 100 |
@SuppressWarnings("unchecked")
|
| 104 |
public void scan(IndexWriter writer) throws SVNException {
|
|
| 101 |
public void scan(Writer indexWriter) throws SVNException {
|
|
| 105 | 102 |
|
| 106 | 103 |
LOGGER.debug("Repositories latest Revision: " + endRevision);
|
| 107 | 104 |
readLogEntries(); |
| ... | ... | |
| 125 | 122 |
LOGGER.debug("changed paths:");
|
| 126 | 123 |
try {
|
| 127 | 124 |
scanBeginRevision(count, logEntry.getRevision(), logEntry.getChangedPaths().size()); |
| 128 |
workOnChangeSet(writer, logEntry); |
|
| 125 |
workOnChangeSet(indexWriter, logEntry); |
|
| 129 | 126 |
} catch (Exception e) {
|
| 130 | 127 |
LOGGER.error("Error during workOnChangeSet() ", e);
|
| 131 | 128 |
} finally {
|
| ... | ... | |
| 179 | 176 |
* @param indexWriter |
| 180 | 177 |
* @param logEntry |
| 181 | 178 |
*/ |
| 182 |
private void workOnChangeSet(IndexWriter indexWriter, SVNLogEntry logEntry) {
|
|
| 183 |
Set changedPathsSet = logEntry.getChangedPaths().keySet(); |
|
| 179 |
private void workOnChangeSet(Writer indexWriter, SVNLogEntry logEntry) {
|
|
| 180 |
Set<?> changedPathsSet = logEntry.getChangedPaths().keySet(); |
|
| 184 | 181 |
|
| 185 | 182 |
TagBranchRecognition tbr = new TagBranchRecognition(getRepository()); |
| 186 | 183 |
|
| ... | ... | |
| 200 | 197 |
} |
| 201 | 198 |
|
| 202 | 199 |
startIndexChangeSet(); |
| 203 |
for (Iterator changedPaths = changedPathsSet.iterator(); changedPaths.hasNext();) {
|
|
| 200 |
for (Iterator<?> changedPaths = changedPathsSet.iterator(); changedPaths.hasNext();) {
|
|
| 201 |
|
|
| 202 |
ResultEntry entry = new ResultEntry(); |
|
| 203 |
|
|
| 204 |
addTagBranchToDoc(res, entry); |
|
| 204 | 205 |
|
| 205 |
Document doc = new Document(); |
|
| 206 |
addTagBranchToDoc(res, doc); |
|
| 207 |
|
|
| 208 | 206 |
//It is needed to check it in every entry |
| 209 | 207 |
//This will result in making entries for every record of the ChangeSet. |
| 210 | 208 |
SVNLogEntryPath entryPath = (SVNLogEntryPath) logEntry.getChangedPaths().get(changedPaths.next()); |
| ... | ... | |
| 223 | 221 |
|
| 224 | 222 |
try {
|
| 225 | 223 |
beginIndexChangeSetItem(dirEntry); |
| 226 |
indexFile(doc, indexWriter, dirEntry, logEntry, entryPath); |
|
| 224 |
indexFile(entry, indexWriter, dirEntry, logEntry, entryPath); |
|
| 227 | 225 |
} catch (IOException e) {
|
| 228 | 226 |
LOGGER.error("IOExcepiton: ", e);
|
| 229 | 227 |
} catch (SVNException e) {
|
| ... | ... | |
| 237 | 235 |
stopIndexChangeSet(); |
| 238 | 236 |
} |
| 239 | 237 |
|
| 240 |
private void addTagBranchToDoc(TagBranch res, Document doc) {
|
|
| 238 |
private void addTagBranchToDoc(TagBranch res, ResultEntry entry) {
|
|
| 241 | 239 |
if (res != null) {
|
| 242 | 240 |
switch (res.getType()) {
|
| 243 | 241 |
case BRANCH: |
| 244 |
addUnTokenizedField(doc, FieldNames.BRANCH, res.getName()); |
|
| 242 |
entry.setBranch(res.getName()); |
|
| 245 | 243 |
break; |
| 246 | 244 |
case TAG: |
| 247 |
addUnTokenizedField(doc, FieldNames.TAG, res.getName()); |
|
| 245 |
entry.setTag(res.getName()); |
|
| 248 | 246 |
switch(res.getTagType()) {
|
| 249 | 247 |
case NONE: |
| 250 | 248 |
break; |
| 251 | 249 |
case TAG: //We already have it marked as Tag. |
| 252 | 250 |
break; |
| 253 | 251 |
case MAVENTAG: |
| 254 |
addUnTokenizedField(doc, FieldNames.MAVENTAG, res.getName()); |
|
| 252 |
entry.setMavenTag(res.getName()); |
|
| 255 | 253 |
break; |
| 256 | 254 |
case SUBVERSIONTAG: |
| 257 |
addUnTokenizedField(doc, FieldNames.SUBVERSIONTAG, res.getName()); |
|
| 255 |
entry.setSubversionTag(res.getName()); |
|
| 258 | 256 |
break; |
| 259 | 257 |
} |
| 260 | 258 |
break; |
| ... | ... | |
| 263 | 261 |
} |
| 264 | 262 |
} |
| 265 | 263 |
} |
| 266 |
|
|
| 267 |
protected void addTokenizedField(Document doc, FieldNames fieldName, String value) {
|
|
| 268 |
doc.add(new Field(fieldName.getValue(), value, Field.Store.YES, Field.Index.ANALYZED)); |
|
| 269 |
} |
|
| 270 |
protected void addTokenizedField(Document doc, String fieldName, String value) {
|
|
| 271 |
doc.add(new Field(fieldName, value, Field.Store.YES, Field.Index.ANALYZED)); |
|
| 272 |
} |
|
| 273 |
private void addUnTokenizedField(Document doc, FieldNames fieldName, String value) {
|
|
| 274 |
doc.add(new Field(fieldName.getValue(), value, Field.Store.YES, Field.Index.NOT_ANALYZED)); |
|
| 275 |
} |
|
| 276 |
private void addUnTokenizedFieldNoStore(Document doc, FieldNames fieldName, String value) {
|
|
| 277 |
doc.add(new Field(fieldName.getValue(), value, Field.Store.NO, Field.Index.NOT_ANALYZED)); |
|
| 278 |
} |
|
| 279 |
private void addUnTokenizedFieldNoStore(Document doc, String fieldName, String value) {
|
|
| 280 |
doc.add(new Field(fieldName, value, Field.Store.NO, Field.Index.NOT_ANALYZED)); |
|
| 281 |
} |
|
| 282 |
private void addUnTokenizedField(Document doc, String fieldName, String value) {
|
|
| 283 |
doc.add(new Field(fieldName, value, Field.Store.YES, Field.Index.NOT_ANALYZED)); |
|
| 284 |
} |
|
| 285 |
private void addUnTokenizedField(Document doc, FieldNames fieldName, Long value) {
|
|
| 286 |
doc.add(new Field(fieldName.getValue(), value.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); |
|
| 287 |
} |
|
| 288 |
private void addUnTokenizedField(Document doc, FieldNames fieldName, Date value) {
|
|
| 289 |
SimpleDateFormat sdf = new SimpleDateFormat("dd.MM.yyyy hh:mm:ss.SSS");
|
|
| 290 |
doc.add(new Field(fieldName.getValue(), sdf.format(value), Field.Store.YES, Field.Index.NOT_ANALYZED)); |
|
| 291 |
} |
|
| 292 |
|
|
| 264 |
|
|
| 293 | 265 |
/** |
| 294 | 266 |
* The method will index a particular document (file) into the Lucene index. |
| 295 | 267 |
* It will store the majority of the information about a file into the Lucene index like |
| ... | ... | |
| 304 | 276 |
* @throws SVNException |
| 305 | 277 |
* @throws IOException |
| 306 | 278 |
*/ |
| 307 |
private void indexFile(Document doc, IndexWriter indexWriter, SVNDirEntry dirEntry, SVNLogEntry logEntry, SVNLogEntryPath entryPath) |
|
| 308 |
throws SVNException, IOException {
|
|
| 309 |
SVNProperties fileProperties = new SVNProperties(); |
|
| 279 |
private void indexFile(ResultEntry entry, Writer indexWriter, SVNDirEntry dirEntry, SVNLogEntry logEntry, SVNLogEntryPath entryPath) |
|
| 280 |
throws SVNException, IOException {
|
|
| 281 |
SVNProperties fileProperties = new SVNProperties(); |
|
| 310 | 282 |
|
| 311 |
SVNNodeKind nodeKind = null; |
|
| 312 |
//if the entry has been deleted we will check the information about the entry |
|
| 313 |
//via the revision before... |
|
| 314 |
LOGGER.debug("Before checking...");
|
|
| 315 |
nodeKind = repository.getRepository().checkPath(entryPath.getPath(), logEntry.getRevision()); |
|
| 316 |
LOGGER.debug("After checking...");
|
|
| 283 |
SVNNodeKind nodeKind = null; |
|
| 284 |
//if the entry has been deleted we will check the information about the entry |
|
| 285 |
//via the revision before... |
|
| 286 |
LOGGER.debug("Before checking...");
|
|
| 287 |
nodeKind = repository.getRepository().checkPath(entryPath.getPath(), logEntry.getRevision()); |
|
| 288 |
LOGGER.debug("After checking...");
|
|
| 317 | 289 |
|
| 318 |
addUnTokenizedField(doc, FieldNames.REVISION, NumberUtils.pad(logEntry.getRevision())); |
|
| 290 |
entry.setRevision(logEntry.getRevision()); |
|
| 319 | 291 |
|
| 320 |
boolean isDir = nodeKind == SVNNodeKind.DIR; |
|
| 321 |
boolean isFile = nodeKind == SVNNodeKind.FILE; |
|
| 322 |
FileName fileName = null; |
|
| 323 |
if (isDir) {
|
|
| 324 |
LOGGER.debug("The " + entryPath.getPath() + " is a directory entry.");
|
|
| 325 |
addUnTokenizedField(doc, FieldNames.NODE, "dir"); |
|
| 326 |
fileName = new FileName(entryPath.getPath(), true); |
|
| 327 |
} else if (isFile) {
|
|
| 328 |
LOGGER.debug("The " + entryPath.getPath() + " is a file entry.");
|
|
| 329 |
addUnTokenizedField(doc, FieldNames.NODE, "file"); |
|
| 330 |
fileName = new FileName(entryPath.getPath(), false); |
|
| 331 |
} else {
|
|
| 332 |
//This means a file/directory has been deleted. |
|
| 333 |
addUnTokenizedField(doc, FieldNames.NODE, "unknown"); |
|
| 334 |
LOGGER.debug("The " + entryPath.getPath() + " is an unknown entry.");
|
|
| 292 |
boolean isDir = nodeKind == SVNNodeKind.DIR; |
|
| 293 |
boolean isFile = nodeKind == SVNNodeKind.FILE; |
|
| 294 |
FileName fileName = null; |
|
| 295 |
if (isDir) {
|
|
| 296 |
LOGGER.debug("The " + entryPath.getPath() + " is a directory entry.");
|
|
| 297 |
entry.setNode("dir");
|
|
| 298 |
fileName = new FileName(entryPath.getPath(), true); |
|
| 299 |
} else if (isFile) {
|
|
| 300 |
LOGGER.debug("The " + entryPath.getPath() + " is a file entry.");
|
|
| 301 |
entry.setNode("file");
|
|
| 302 |
fileName = new FileName(entryPath.getPath(), false); |
|
| 303 |
} else {
|
|
| 304 |
//This means a file/directory has been deleted. |
|
| 305 |
entry.setNode("unknown");
|
|
| 306 |
LOGGER.debug("The " + entryPath.getPath() + " is an unknown entry.");
|
|
| 335 | 307 |
|
| 336 |
//We would like to know what is has been? |
|
| 337 |
//Directory? File? So we go a step back in History... |
|
| 338 |
long rev = logEntry.getRevision() - 1; |
|
| 339 |
SVNNodeKind nodeKindUnknown = getRepository().getRepository().checkPath(entryPath.getPath(), rev); |
|
| 340 |
LOGGER.debug("NodeKind(" + rev + "): " + nodeKindUnknown.toString());
|
|
| 341 |
fileName = new FileName(entryPath.getPath(), nodeKindUnknown == SVNNodeKind.DIR); |
|
| 342 |
} |
|
| 308 |
//We would like to know what is has been? |
|
| 309 |
//Directory? File? So we go a step back in History... |
|
| 310 |
long rev = logEntry.getRevision() - 1; |
|
| 311 |
SVNNodeKind nodeKindUnknown = getRepository().getRepository().checkPath(entryPath.getPath(), rev); |
|
| 312 |
LOGGER.debug("NodeKind(" + rev + "): " + nodeKindUnknown.toString());
|
|
| 313 |
fileName = new FileName(entryPath.getPath(), nodeKindUnknown == SVNNodeKind.DIR); |
|
| 314 |
} |
|
| 343 | 315 |
|
| 344 |
if (LOGGER.isDebugEnabled()) {
|
|
| 345 |
LOGGER.debug("FileNameCheck: entryPath -> kind:" + nodeKind.toString() + " path:" + entryPath.getPath());
|
|
| 346 |
LOGGER.debug("FileNameCheck: path:'" + fileName.getPath() + "' filename:'" + fileName.getBaseName() + "'");
|
|
| 347 |
} |
|
| 316 |
if (LOGGER.isDebugEnabled()) {
|
|
| 317 |
LOGGER.debug("FileNameCheck: entryPath -> kind:" + nodeKind.toString() + " path:" + entryPath.getPath());
|
|
| 318 |
LOGGER.debug("FileNameCheck: path:'" + fileName.getPath() + "' filename:'" + fileName.getBaseName() + "'");
|
|
| 319 |
} |
|
| 348 | 320 |
|
| 349 |
//TODO: We have to check if we need to set localization |
|
| 350 |
addUnTokenizedFieldNoStore(doc, FieldNames.PATH, fileName.getPath().toLowerCase()); |
|
| 351 |
addUnTokenizedField(doc, FieldNames.PATH, fileName.getPath()); |
|
| 321 |
//TODO: We have to check if we need to set localization |
|
| 322 |
entry.setPath(fileName.getPath()); |
|
| 323 |
|
|
| 324 |
//Does a copy operation took place... |
|
| 325 |
if (entryPath.getCopyPath() != null) {
|
|
| 326 |
entry.setFrom(entryPath.getCopyPath()); |
|
| 327 |
entry.setFromRev(entryPath.getCopyRevision()); |
|
| 328 |
} |
|
| 352 | 329 |
|
| 353 |
//Does a copy operation took place... |
|
| 354 |
if (entryPath.getCopyPath() != null) {
|
|
| 355 |
addUnTokenizedField(doc, FieldNames.FROM, entryPath.getCopyPath()); |
|
| 356 |
addUnTokenizedField(doc, FieldNames.FROMREV, entryPath.getCopyRevision()); |
|
| 357 |
} |
|
| 330 |
//The field we use for searching is stored as lowercase. |
|
| 331 |
//TODO: We have to check if we need to set localization |
|
| 332 |
entry.setFilename(fileName.getBaseName()); |
|
| 333 |
|
|
| 334 |
entry.setAuthor(logEntry.getAuthor() == null ? "" : logEntry.getAuthor()); |
|
| 335 |
|
|
| 336 |
//We will add the message as tokenized field to be able to search within the log messages. |
|
| 337 |
entry.setMessage(logEntry.getMessage() == null ? "" : logEntry.getMessage()); |
|
| 358 | 338 |
|
| 359 |
//The field we use for searching is stored as lowercase. |
|
| 360 |
//TODO: We have to check if we need to set localization |
|
| 361 |
addUnTokenizedFieldNoStore(doc, FieldNames.FILENAME, fileName.getBaseName().toLowerCase()); |
|
| 362 |
addUnTokenizedField(doc, FieldNames.FILENAME, fileName.getBaseName()); |
|
| 339 |
entry.setDate(logEntry.getDate()); |
|
| 363 | 340 |
|
| 364 |
addUnTokenizedField(doc, FieldNames.AUTHOR, logEntry.getAuthor() == null ? "" : logEntry.getAuthor()); |
|
| 341 |
entry.setKind(String.valueOf(entryPath.getType()).toLowerCase()); |
|
| 365 | 342 |
|
| 366 |
//We will add the message as tokenized field to be able to search within the log messages. |
|
| 367 |
addTokenizedField(doc, FieldNames.MESSAGE, logEntry.getMessage() == null ? "" : logEntry.getMessage()); |
|
| 368 |
addUnTokenizedField(doc, FieldNames.DATE, logEntry.getDate()); |
|
| 343 |
//TODO: May be don't need this if we use repository name? |
|
| 344 |
entry.setRepositoryUUID(getRepository().getRepository().getRepositoryUUID(false)); |
|
| 345 |
|
|
| 346 |
entry.setRepository(getName()); |
|
| 369 | 347 |
|
| 370 |
addUnTokenizedField(doc, FieldNames.KIND, String.valueOf(entryPath.getType()).toLowerCase()); |
|
| 348 |
if (nodeKind == SVNNodeKind.NONE) {
|
|
| 349 |
LOGGER.debug("The " + entryPath.getPath() + " is a NONE entry.");
|
|
| 350 |
} else if (nodeKind == SVNNodeKind.DIR) {
|
|
| 351 |
//The given entry is a directory. |
|
| 352 |
LOGGER.debug("The " + entryPath.getPath() + " is a directory.");
|
|
| 353 |
//Here we need to call getDir to get directory properties. |
|
| 354 |
Collection<SVNDirEntry> dirEntries = null; |
|
| 355 |
getRepository().getRepository().getDir(entryPath.getPath(), logEntry.getRevision(), fileProperties, dirEntries); |
|
| 356 |
//indexProperties(fileProperties, doc); |
|
| 357 |
indexProperties(fileProperties, entry); |
|
| 371 | 358 |
|
| 372 |
//TODO: May be don't need this if we use repository name? |
|
| 373 |
addUnTokenizedField(doc, FieldNames.REPOSITORYUUID, getRepository().getRepository().getRepositoryUUID(false)); |
|
| 359 |
} else if (nodeKind == SVNNodeKind.FILE) {
|
|
| 374 | 360 |
|
| 375 |
addUnTokenizedField(doc, FieldNames.REPOSITORY, getName()); |
|
| 361 |
//The given entry is a file. |
|
| 362 |
//This means we will get every file from the repository.... |
|
| 363 |
//Get only the properties of the file |
|
| 376 | 364 |
|
| 377 |
if (nodeKind == SVNNodeKind.NONE) {
|
|
| 378 |
LOGGER.debug("The " + entryPath.getPath() + " is a NONE entry.");
|
|
| 379 |
} else if (nodeKind == SVNNodeKind.DIR) {
|
|
| 380 |
//The given entry is a directory. |
|
| 381 |
LOGGER.debug("The " + entryPath.getPath() + " is a directory.");
|
|
| 382 |
//Here we need to call getDir to get directory properties. |
|
| 383 |
Collection<SVNDirEntry> dirEntries = null; |
|
| 384 |
getRepository().getRepository().getDir(entryPath.getPath(), logEntry.getRevision(), fileProperties, dirEntries); |
|
| 385 |
indexProperties(fileProperties, doc); |
|
| 365 |
entry.setSize(dirEntry.getSize()); |
|
| 366 |
getRepository().getRepository().getFile(entryPath.getPath(), logEntry.getRevision(), fileProperties, null); |
|
| 367 |
//indexProperties(fileProperties, doc); |
|
| 368 |
indexProperties(fileProperties, entry); |
|
| 386 | 369 |
|
| 387 |
} else if (nodeKind == SVNNodeKind.FILE) {
|
|
| 388 |
|
|
| 389 |
//The given entry is a file. |
|
| 390 |
//This means we will get every file from the repository.... |
|
| 391 |
//Get only the properties of the file |
|
| 370 |
IParse documentParser = new DocumentParser(); |
|
| 371 |
documentParser.parse(entry, getRepository(), dirEntry, entryPath.getPath(), logEntry.getRevision()); |
|
| 372 |
} |
|
| 392 | 373 |
|
| 393 |
addTokenizedField(doc, FieldNames.SIZE, Long.toString(dirEntry.getSize())); |
|
| 394 |
getRepository().getRepository().getFile(entryPath.getPath(), logEntry.getRevision(), fileProperties, null); |
|
| 395 |
indexProperties(fileProperties, doc); |
|
| 396 |
|
|
| 397 |
FileExtensionHandler feh = new FileExtensionHandler(); |
|
| 398 |
feh.setFileProperties(fileProperties); |
|
| 399 |
feh.setDoc(doc); |
|
| 400 |
feh.execute(getRepository(), dirEntry, entryPath.getPath(), logEntry.getRevision()); |
|
| 401 |
} |
|
| 402 |
|
|
| 403 |
indexWriter.addDocument(doc); |
|
| 404 |
LOGGER.debug("File " + entryPath.getPath() + " indexed...");
|
|
| 374 |
indexWriter.addResultEntry(entry); |
|
| 375 |
LOGGER.debug("File " + entryPath.getPath() + " indexed...");
|
|
| 405 | 376 |
} |
| 406 | 377 |
|
| 407 |
|
|
| 408 | 378 |
/** |
| 409 | 379 |
* This method will index only those properties which do not start |
| 410 | 380 |
* with {@link SVN_WC_PREFIX} nor with {@link SVN_ENTRY_PREFIX}.
|
| 411 | 381 |
* @param fileProperties |
| 412 | 382 |
* @param doc |
| 413 | 383 |
*/ |
| 414 |
private void indexProperties(SVNProperties fileProperties, Document doc) {
|
|
| 384 |
private void indexProperties(SVNProperties fileProperties, ResultEntry entry) {
|
|
| 415 | 385 |
SVNProperties list = fileProperties.getRegularProperties(); |
| 416 | 386 |
|
| 417 | 387 |
for (Iterator<String> iterator = list.nameSet().iterator(); iterator.hasNext();) {
|
| 418 | 388 |
String propname = (String) iterator.next(); |
| 419 | 389 |
LOGGER.debug("Indexing property: " + propname);
|
| 420 |
addUnTokenizedFieldNoStore(doc, propname, list.getStringValue(propname).toLowerCase()); |
|
| 421 |
addUnTokenizedField(doc, propname, list.getStringValue(propname)); |
|
| 390 |
entry.addProperty(propname, list.getStringValue(propname)); |
|
| 422 | 391 |
} |
| 423 | 392 |
} |
| 424 |
|
|
| 425 | 393 |
public long getStartRevision() {
|
| 426 | 394 |
return startRevision; |
| 427 | 395 |
} |
| src/main/java/com/soebes/supose/scan/ScanSingleRepository.java (Arbeitskopie) | ||
|---|---|---|
| 30 | 30 |
|
| 31 | 31 |
import org.apache.commons.io.FileUtils; |
| 32 | 32 |
import org.apache.log4j.Logger; |
| 33 |
import org.apache.lucene.analysis.Analyzer; |
|
| 34 |
import org.apache.lucene.index.CorruptIndexException; |
|
| 35 |
import org.apache.lucene.index.IndexWriter; |
|
| 36 | 33 |
import org.tmatesoft.svn.core.SVNAuthenticationException; |
| 37 | 34 |
import org.tmatesoft.svn.core.SVNException; |
| 38 | 35 |
import org.tmatesoft.svn.core.auth.ISVNAuthenticationManager; |
| ... | ... | |
| 40 | 37 |
import com.soebes.supose.cli.CLIChangeSetInterceptor; |
| 41 | 38 |
import com.soebes.supose.cli.CLIInterceptor; |
| 42 | 39 |
import com.soebes.supose.cli.CLILogEntryInterceptor; |
| 43 |
import com.soebes.supose.index.Index; |
|
| 44 |
import com.soebes.supose.index.IndexHelper; |
|
| 40 |
import com.soebes.supose.index.Writer; |
|
| 41 |
import com.soebes.supose.index.lucene.LuceneIndexWriter; |
|
| 45 | 42 |
import com.soebes.supose.repository.Repository; |
| 46 |
import com.soebes.supose.utility.AnalyzerFactory; |
|
| 47 | 43 |
|
| 48 | 44 |
public class ScanSingleRepository {
|
| 49 | 45 |
private static Logger LOGGER = Logger.getLogger(ScanSingleRepository.class); |
| 50 | 46 |
|
| 51 |
|
|
| 52 | 47 |
public static long scanFullRepository( |
| 53 | 48 |
String url, |
| 54 | 49 |
long fromRev, |
| 55 |
String indexDirectory, |
|
| 50 |
Writer writer, |
|
| 56 | 51 |
boolean create, |
| 57 | 52 |
ISVNAuthenticationManager authManager |
| 58 | 53 |
) throws SVNException {
|
| ... | ... | |
| 103 | 98 |
scanRepository.setStartRevision(startRevision); |
| 104 | 99 |
//We will scan the repository to the current HEAD of the repository. |
| 105 | 100 |
scanRepository.setEndRevision(endRevision); |
| 106 |
|
|