Changeset - b6c902d88472
[Not reviewed]
beta
0 7 0
Marcin Kuzminski - 14 years ago 2012-02-16 04:41:58
marcin@python-works.com
bumbed whoosh to 2.3.X series
- fixed some html issues on search results
7 files changed with 49 insertions and 33 deletions:
0 comments (0 inline, 0 general)
requires.txt
Show inline comments
 
@@ -2,13 +2,13 @@ Pylons==1.0.0
 
Beaker==1.6.2
 
WebHelpers>=1.2
 
formencode==1.2.4
 
SQLAlchemy==0.7.4
 
Mako==0.5.0
 
pygments>=1.4
 
whoosh<1.8
 
whoosh<2.4
 
celery>=2.2.5,<2.3
 
babel
 
python-dateutil>=1.5.0,<2.0.0
 
dulwich>=0.8.0,<0.9.0
 
vcs>=0.2.3.dev
 
webob==1.0.8
rhodecode/__init__.py
Show inline comments
 
@@ -41,13 +41,13 @@ requirements = [
 
    "Beaker==1.6.2",
 
    "WebHelpers>=1.2",
 
    "formencode==1.2.4",
 
    "SQLAlchemy==0.7.4",
 
    "Mako==0.5.0",
 
    "pygments>=1.4",
 
    "whoosh<1.8",
 
    "whoosh<2.4",
 
    "celery>=2.2.5,<2.3",
 
    "babel",
 
    "python-dateutil>=1.5.0,<2.0.0",
 
    "dulwich>=0.8.0,<0.9.0",
 
    "vcs>=0.2.3.dev",
 
    "webob==1.0.8",
rhodecode/controllers/search.py
Show inline comments
 
@@ -23,13 +23,13 @@
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
import logging
 
import traceback
 

	
 
from pylons.i18n.translation import _
 
from pylons import request, config, session, tmpl_context as c
 
from pylons import request, config, tmpl_context as c
 

	
 
from rhodecode.lib.auth import LoginRequired
 
from rhodecode.lib.base import BaseController, render
 
from rhodecode.lib.indexers import SCHEMA, IDX_NAME, ResultWrapper
 

	
 
from webhelpers.paginate import Page
 
@@ -73,13 +73,13 @@ class SearchController(BaseController):
 

	
 
                qp = QueryParser(search_type, schema=SCHEMA)
 
                if c.repo_name:
 
                    cur_query = u'repository:%s %s' % (c.repo_name, cur_query)
 
                try:
 
                    query = qp.parse(unicode(cur_query))
 

	
 
                    # extract words for highlight
 
                    if isinstance(query, Phrase):
 
                        highlight_items.update(query.words)
 
                    elif isinstance(query, Prefix):
 
                        highlight_items.add(query.text)
 
                    else:
 
                        for i in query.all_terms():
 
@@ -89,24 +89,28 @@ class SearchController(BaseController):
 
                    matcher = query.matcher(searcher)
 

	
 
                    log.debug(query)
 
                    log.debug(highlight_items)
 
                    results = searcher.search(query)
 
                    res_ln = len(results)
 
                    c.runtime = '%s results (%.3f seconds)' \
 
                        % (res_ln, results.runtime)
 
                    c.runtime = '%s results (%.3f seconds)' % (
 
                        res_ln, results.runtime
 
                    )
 

	
 
                    def url_generator(**kw):
 
                        return update_params("?q=%s&type=%s" \
 
                                           % (c.cur_query, c.cur_search), **kw)
 

	
 
                    c.formated_results = Page(
 
                                ResultWrapper(search_type, searcher, matcher,
 
                                              highlight_items),
 
                                page=p, item_count=res_ln,
 
                                items_per_page=10, url=url_generator)
 
                        page=p,
 
                        item_count=res_ln,
 
                        items_per_page=10,
 
                        url=url_generator
 
                    )
 

	
 
                except QueryParserError:
 
                    c.runtime = _('Invalid search query. Try quoting it.')
 
                searcher.close()
 
            except (EmptyIndexError, IOError):
 
                log.error(traceback.format_exc())
 
@@ -114,8 +118,9 @@ class SearchController(BaseController):
 
                c.runtime = _('There is no index to search in. '
 
                              'Please run whoosh indexer')
 
            except (Exception):
 
                log.error(traceback.format_exc())
 
                c.runtime = _('An error occurred during this search operation')
 

	
 

	
 
        # Return a rendered template
 
        return render('/search/search.html')
rhodecode/lib/indexers/__init__.py
Show inline comments
 
@@ -34,44 +34,45 @@ from string import strip
 
from shutil import rmtree
 

	
 
from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
 
from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
 
from whoosh.index import create_in, open_dir
 
from whoosh.formats import Characters
 
from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
 
from whoosh.highlight import highlight, HtmlFormatter, ContextFragmenter
 

	
 
from webhelpers.html.builder import escape
 
from sqlalchemy import engine_from_config
 
from vcs.utils.lazy import LazyProperty
 

	
 
from rhodecode.model import init_model
 
from rhodecode.model.scm import ScmModel
 
from rhodecode.model.repo import RepoModel
 
from rhodecode.config.environment import load_environment
 
from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
 
from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP, LazyProperty
 
from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
 

	
 
#EXTENSIONS WE WANT TO INDEX CONTENT OFF
 
INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
 

	
 
#CUSTOM ANALYZER wordsplit + lowercase filter
 
ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 

	
 

	
 
#INDEX SCHEMA DEFINITION
 
SCHEMA = Schema(owner=TEXT(),
 
SCHEMA = Schema(
 
    owner=TEXT(),
 
                repository=TEXT(stored=True),
 
                path=TEXT(stored=True),
 
                content=FieldType(format=Characters(ANALYZER),
 
    content=FieldType(format=Characters(), analyzer=ANALYZER,
 
                             scorable=True, stored=True),
 
                modtime=STORED(), extension=TEXT(stored=True))
 

	
 
    modtime=STORED(),
 
    extension=TEXT(stored=True)
 
)
 

	
 
IDX_NAME = 'HG_INDEX'
 
FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
 
FRAGMENTER = SimpleFragmenter(200)
 
FRAGMENTER = ContextFragmenter(200)
 

	
 

	
 
class MakeIndex(BasePasterCommand):
 

	
 
    max_args = 1
 
    min_args = 1
 
@@ -133,13 +134,13 @@ class MakeIndex(BasePasterCommand):
 
class ResultWrapper(object):
 
    def __init__(self, search_type, searcher, matcher, highlight_items):
 
        self.search_type = search_type
 
        self.searcher = searcher
 
        self.matcher = matcher
 
        self.highlight_items = highlight_items
 
        self.fragment_size = 200 / 2
 
        self.fragment_size = 200
 

	
 
    @LazyProperty
 
    def doc_ids(self):
 
        docs_id = []
 
        while self.matcher.is_active():
 
            docnum = self.matcher.id()
 
@@ -169,16 +170,16 @@ class ResultWrapper(object):
 
    def __getitem__(self, key):
 
        """
 
        Slicing of resultWrapper
 
        """
 
        i, j = key.start, key.stop
 

	
 
        slice = []
 
        slices = []
 
        for docid in self.doc_ids[i:j]:
 
            slice.append(self.get_full_content(docid))
 
        return slice
 
            slices.append(self.get_full_content(docid))
 
        return slices
 

	
 
    def get_full_content(self, docid):
 
        res = self.searcher.stored_fields(docid[0])
 
        f_path = res['path'][res['path'].find(res['repository']) \
 
                             + len(res['repository']):].lstrip('/')
 

	
 
@@ -214,13 +215,15 @@ class ResultWrapper(object):
 
            memory.append((start_offseted, end_offseted,))
 
            yield (start_offseted, end_offseted,)
 

	
 
    def highlight(self, content, top=5):
 
        if self.search_type != 'content':
 
            return ''
 
        hl = highlight(escape(content),
 
                 self.highlight_items,
 
        hl = highlight(
 
            text=escape(content),
 
            terms=self.highlight_items,
 
                 analyzer=ANALYZER,
 
                 fragmenter=FRAGMENTER,
 
                 formatter=FORMATTER,
 
                 top=top)
 
            top=top
 
        )
 
        return hl
rhodecode/lib/indexers/daemon.py
Show inline comments
 
@@ -46,13 +46,12 @@ from rhodecode.lib.indexers import INDEX
 
from vcs.exceptions import ChangesetError, RepositoryError, \
 
    NodeDoesNotExistError
 

	
 
from whoosh.index import create_in, open_dir
 

	
 

	
 

	
 
log = logging.getLogger('whooshIndexer')
 
# create logger
 
log.setLevel(logging.DEBUG)
 
log.propagate = False
 
# create console handler and set level to debug
 
ch = logging.StreamHandler()
 
@@ -65,18 +64,19 @@ formatter = logging.Formatter("%(asctime
 
# add formatter to ch
 
ch.setFormatter(formatter)
 

	
 
# add ch to logger
 
log.addHandler(ch)
 

	
 

	
 
class WhooshIndexingDaemon(object):
 
    """
 
    Daemon for atomic jobs
 
    """
 

	
 
    def __init__(self, indexname='HG_INDEX', index_location=None,
 
    def __init__(self, indexname=IDX_NAME, index_location=None,
 
                 repo_location=None, sa=None, repo_list=None):
 
        self.indexname = indexname
 

	
 
        self.index_location = index_location
 
        if not index_location:
 
            raise Exception('You have to provide index location')
 
@@ -92,13 +92,12 @@ class WhooshIndexingDaemon(object):
 
            for repo_name, repo in self.repo_paths.items():
 
                if repo_name in repo_list:
 
                    filtered_repo_paths[repo_name] = repo
 

	
 
            self.repo_paths = filtered_repo_paths
 

	
 

	
 
        self.initial = False
 
        if not os.path.isdir(self.index_location):
 
            os.makedirs(self.index_location)
 
            log.info('Cannot run incremental index since it does not'
 
                     ' yet exist running full build')
 
            self.initial = True
 
@@ -152,13 +151,12 @@ class WhooshIndexingDaemon(object):
 
                        repository=safe_unicode(repo_name),
 
                        path=safe_unicode(path),
 
                        content=u_content,
 
                        modtime=self.get_node_mtime(node),
 
                        extension=node.extension)
 

	
 

	
 
    def build_index(self):
 
        if os.path.exists(self.index_location):
 
            log.debug('removing previous index')
 
            rmtree(self.index_location)
 

	
 
        if not os.path.exists(self.index_location):
 
@@ -174,13 +172,12 @@ class WhooshIndexingDaemon(object):
 
                self.add_doc(writer, idx_path, repo, repo_name)
 

	
 
        log.debug('>> COMMITING CHANGES <<')
 
        writer.commit(merge=True)
 
        log.debug('>>> FINISHED BUILDING INDEX <<<')
 

	
 

	
 
    def update_index(self):
 
        log.debug('STARTING INCREMENTAL INDEXING UPDATE')
 

	
 
        idx = open_dir(self.index_location, indexname=self.indexname)
 
        # The set of all paths in the index
 
        indexed_paths = set()
rhodecode/public/css/pygments.css
Show inline comments
 
@@ -62,16 +62,26 @@ div.codeblock .code-body table{
 
div.codeblock .code-body table td {
 
	border: 0px !important;
 
}
 
div.code-body {
 
	background-color: #FFFFFF;
 
}
 
div.code-body pre .match{
 

	
 
div.codeblock .code-header .search-path {
 
	padding: 0px 0px 0px 10px;
 
}
 

	
 
div.search-code-body {
 
    background-color: #FFFFFF;
 
    padding: 5px 0px 5px 10px;
 
}
 

	
 
div.search-code-body pre .match{
 
	background-color: #FAFFA6;
 
}
 
div.code-body pre .break{
 
div.search-code-body pre .break{
 
	background-color: #DDE7EF;
 
	width: 100%;
 
	color: #747474;
 
	display: block;
 
	
 
}
rhodecode/templates/search/search_content.html
Show inline comments
 
@@ -2,16 +2,17 @@
 

	
 
%for cnt,sr in enumerate(c.formated_results):
 
    %if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(sr['repository'],'search results check'):
 
    <div class="table">
 
        <div id="body${cnt}" class="codeblock">
 
            <div class="code-header">
 
                <div class="revision">${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
 
                h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}</div>
 
                <div class="search-path">${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
 
                h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}
 
            </div>
 
            <div class="code-body">
 
            </div>
 
            <div class="search-code-body">
 
                <pre>${h.literal(sr['content_short_hl'])}</pre>
 
            </div>
 
        </div>
 
    </div>
 
    %else:
 
        %if cnt == 0:
0 comments (0 inline, 0 general)