Changeset - 65b2f150beb7
[Not reviewed]
default
0 4 4
Marcin Kuzminski - 15 years ago 2010-10-07 17:32:24
marcin@python-works.com
Added searching for file names within the repository in rhodecode
8 files changed with 140 insertions and 41 deletions:
0 comments (0 inline, 0 general)
rhodecode/controllers/search.py
Show inline comments
 
@@ -4,98 +4,109 @@
 
# Copyright (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
 
# 
 
# This program is free software; you can redistribute it and/or
 
# modify it under the terms of the GNU General Public License
 
# as published by the Free Software Foundation; version 2
 
# of the License or (at your opinion) any later version of the license.
 
# 
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
# 
 
# You should have received a copy of the GNU General Public License
 
# along with this program; if not, write to the Free Software
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
# MA  02110-1301, USA.
 
"""
 
Created on Aug 7, 2010
 
search controller for pylons
 
@author: marcink
 
"""
 
from pylons import request, response, session, tmpl_context as c, url
 
from pylons.controllers.util import abort, redirect
 
from rhodecode.lib.auth import LoginRequired
 
from rhodecode.lib.base import BaseController, render
 
from rhodecode.lib.indexers import IDX_LOCATION, SCHEMA, IDX_NAME, ResultWrapper
 
from webhelpers.paginate import Page
 
from webhelpers.util import update_params
 
from pylons.i18n.translation import _
 
from whoosh.index import open_dir, EmptyIndexError
 
from whoosh.qparser import QueryParser, QueryParserError
 
from whoosh.query import Phrase
 
import logging
 
import traceback
 

	
 
log = logging.getLogger(__name__)
 

	
 
class SearchController(BaseController):
 

	
 
    @LoginRequired()
 
    def __before__(self):
 
        super(SearchController, self).__before__()    
 

	
 
    def index(self, search_repo=None):
 
        c.repo_name = search_repo
 
        c.formated_results = []
 
        c.runtime = ''
 
        c.cur_query = request.GET.get('q', None)
 
        c.cur_type = request.GET.get('type', 'source')
 
        c.cur_search = search_type = {'content':'content',
 
                                      'commit':'content',
 
                                      'path':'path',
 
                                      'repository':'repository'}\
 
                                      .get(c.cur_type, 'content')
 

	
 
        
 
        if c.cur_query:
 
            cur_query = c.cur_query.lower()
 
        
 
        if c.cur_query:
 
            p = int(request.params.get('page', 1))
 
            highlight_items = set()
 
            try:
 
                idx = open_dir(IDX_LOCATION, indexname=IDX_NAME)
 
                searcher = idx.searcher()
 

	
 
                qp = QueryParser("content", schema=SCHEMA)
 
                qp = QueryParser(search_type, schema=SCHEMA)
 
                if c.repo_name:
 
                    cur_query = u'repository:%s %s' % (c.repo_name, cur_query)
 
                try:
 
                    query = qp.parse(unicode(cur_query))
 
                    
 
                    if isinstance(query, Phrase):
 
                        highlight_items.update(query.words)
 
                    else:
 
                        for i in query.all_terms():
 
                            if i[0] == 'content':
 
                                highlight_items.add(i[1])
 

	
 
                    matcher = query.matcher(searcher)
 
                    
 
                    log.debug(query)
 
                    log.debug(highlight_items)
 
                    results = searcher.search(query)
 
                    res_ln = len(results)
 
                    c.runtime = '%s results (%.3f seconds)' \
 
                    % (res_ln, results.runtime)
 
                    
 
                    def url_generator(**kw):
 
                        return update_params("?q=%s" % c.cur_query, **kw)
 
                        return update_params("?q=%s&type=%s" \
 
                                           % (c.cur_query, c.cur_search), **kw)
 

	
 
                    c.formated_results = Page(
 
                                ResultWrapper(searcher, matcher, highlight_items),
 
                                ResultWrapper(search_type, searcher, matcher,
 
                                              highlight_items),
 
                                page=p, item_count=res_ln,
 
                                items_per_page=10, url=url_generator)
 
                           
 
                    
 
                except QueryParserError:
 
                    c.runtime = _('Invalid search query. Try quoting it.')
 
                searcher.close()
 
            except (EmptyIndexError, IOError):
 
                log.error(traceback.format_exc())
 
                log.error('Empty Index data')
 
                c.runtime = _('There is no index to search in. Please run whoosh indexer')
 
                        
 
        # Return a rendered template
 
        return render('/search/search.html')
rhodecode/lib/indexers/__init__.py
Show inline comments
 
from os.path import dirname as dn, join as jn
 
from rhodecode.config.environment import load_environment
 
from rhodecode.model.hg_model import HgModel
 
from shutil import rmtree
 
from webhelpers.html.builder import escape
 
from vcs.utils.lazy import LazyProperty
 

	
 
from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
 
from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
 
from whoosh.index import create_in, open_dir
 
from whoosh.formats import Characters
 
from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter   
 

	
 
import os
 
import sys
 
import traceback
 

	
 
#to get the rhodecode import
 
sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
 

	
 

	
 
#LOCATION WE KEEP THE INDEX
 
IDX_LOCATION = jn(dn(dn(dn(dn(os.path.abspath(__file__))))), 'data', 'index')
 

	
 
#EXTENSIONS WE WANT TO INDEX CONTENT OFF
 
INDEX_EXTENSIONS = ['action', 'adp', 'ashx', 'asmx', 'aspx', 'asx', 'axd', 'c',
 
                    'cfg', 'cfm', 'cpp', 'cs', 'css', 'diff', 'do', 'el', 'erl',
 
                    'h', 'htm', 'html', 'ini', 'java', 'js', 'jsp', 'jspx', 'lisp',
 
                    'lua', 'm', 'mako', 'ml', 'pas', 'patch', 'php', 'php3',
 
                    'php4', 'phtml', 'pm', 'py', 'rb', 'rst', 's', 'sh', 'sql',
 
                    'tpl', 'txt', 'vim', 'wss', 'xhtml', 'xml', 'xsl', 'xslt',
 
                    'yaws']
 

	
 
#CUSTOM ANALYZER wordsplit + lowercase filter
 
ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
 

	
 

	
 
#INDEX SCHEMA DEFINITION
 
SCHEMA = Schema(owner=TEXT(),
 
                repository=TEXT(stored=True),
 
                path=ID(stored=True, unique=True),
 
                path=TEXT(stored=True),
 
                content=FieldType(format=Characters(ANALYZER),
 
                             scorable=True, stored=True),
 
                modtime=STORED(), extension=TEXT(stored=True))
 

	
 

	
 
IDX_NAME = 'HG_INDEX'
 
FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n') 
 
FRAGMENTER = SimpleFragmenter(200)
 
                            
 
class ResultWrapper(object):
 
    def __init__(self, searcher, matcher, highlight_items):
 
    def __init__(self, search_type, searcher, matcher, highlight_items):
 
        self.search_type = search_type
 
        self.searcher = searcher
 
        self.matcher = matcher
 
        self.highlight_items = highlight_items
 
        self.fragment_size = 200 / 2
 
    
 
    @LazyProperty
 
    def doc_ids(self):
 
        docs_id = []
 
        while self.matcher.is_active():
 
            docnum = self.matcher.id()
 
            chunks = [offsets for offsets in self.get_chunks()]
 
            docs_id.append([docnum, chunks])
 
            self.matcher.next()
 
        return docs_id   
 
        
 
    def __str__(self):
 
        return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
 

	
 
    def __repr__(self):
 
        return self.__str__()
 

	
 
    def __len__(self):
 
        return len(self.doc_ids)
 

	
 
    def __iter__(self):
 
        """
 
        Allows Iteration over results,and lazy generate content
 

	
 
        *Requires* implementation of ``__getitem__`` method.
 
        """
 
        for docid in self.doc_ids:
 
            yield self.get_full_content(docid)
 

	
 
    def __getslice__(self, i, j):
 
        """
 
        Slicing of resultWrapper
 
        """
 
        slice = []
 
        for docid in self.doc_ids[i:j]:
 
            slice.append(self.get_full_content(docid))
 
        return slice   
 
                            
 

	
 
    def get_full_content(self, docid):
 
        res = self.searcher.stored_fields(docid[0])
 
        f_path = res['path'][res['path'].find(res['repository']) \
 
                             + len(res['repository']):].lstrip('/')
 
        
 
        content_short = self.get_short_content(res, docid[1])
 
        res.update({'content_short':content_short,
 
                    'content_short_hl':self.highlight(content_short),
 
                    'f_path':f_path})
 
        
 
        return res        
 
    
 
    def get_short_content(self, res, chunks):
 
        
 
        return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
 
    
 
    def get_chunks(self):
 
        """
 
        Smart function that implements chunking the content
 
        but not overlap chunks so it doesn't highlight the same
 
        close occurences twice.
 
        close occurrences twice.
 
        @param matcher:
 
        @param size:
 
        """
 
        memory = [(0, 0)]
 
        for span in self.matcher.spans():
 
            start = span.startchar or 0
 
            end = span.endchar or 0
 
            start_offseted = max(0, start - self.fragment_size)
 
            end_offseted = end + self.fragment_size
 
            
 
            if start_offseted < memory[-1][1]:
 
                start_offseted = memory[-1][1]
 
            memory.append((start_offseted, end_offseted,))    
 
            yield (start_offseted, end_offseted,)  
 
        
 
    def highlight(self, content, top=5):
 
        if self.search_type != 'content':
 
            return ''
 
        hl = highlight(escape(content),
 
                 self.highlight_items,
 
                 analyzer=ANALYZER,
 
                 fragmenter=FRAGMENTER,
 
                 formatter=FORMATTER,
 
                 top=top)
 
        return hl 
rhodecode/public/css/style.css
Show inline comments
 
@@ -1273,96 +1273,102 @@ div.options a:hover
 
    height: 1%;
 
	clear: both;
 
	overflow: hidden;
 
    background: #E6EFC2;
 
    border: 1px solid #C6D880;
 
    color: #4e6100;
 
}
 
 
#content div.box div.message-success h6
 
{
 
    color: #4e6100;
 
}
 
 
/* -----------------------------------------------------------
 
	content -> right -> box / forms
 
----------------------------------------------------------- */
 
 
#content div.box div.form
 
{
 
	margin: 0;
 
	padding: 0 20px 10px 20px;
 
    clear: both;
 
    overflow: hidden;
 
}
 
 
#content div.box div.form div.fields
 
{
 
	margin: 0;
 
	padding: 0;
 
    clear: both;
 
    overflow: hidden;
 
}
 
 
#content div.box div.form div.fields div.field
 
{
 
	margin: 0;
 
	padding: 10px 0 10px 0; 
 
	height: 1%;
 
	border-bottom: 1px solid #DDDDDD;
 
	clear: both;
 
	overflow: hidden;
 
}
 
 
#content div.box div.form div.fields div.field-first
 
{
 
	padding: 0 0 10px 0; 
 
}
 
 
#content div.box div.form div.fields div.field-noborder
 
{
 
    border-bottom: 0px !important; 
 
}
 
 
 
#content div.box div.form div.fields div.field span.error-message
 
{
 
	margin: 8px 0 0 0;
 
	padding: 0;
 
	height: 1%;
 
	display: block;
 
	color: #FF0000;
 
}
 
 
#content div.box div.form div.fields div.field span.success
 
{
 
	margin: 8px 0 0 0;
 
	padding: 0;
 
	height: 1%;
 
	display: block;
 
	color: #316309;
 
}
 
 
/* -----------------------------------------------------------
 
	content -> right -> forms -> labels
 
----------------------------------------------------------- */
 
 
#content div.box div.form div.fields div.field div.label
 
{
 
	left: 310px;
 
	margin: 0;
 
	padding: 8px 0 0 5px;
 
	width: auto;
 
	position: absolute;
 
}
 
 
#content div.box-left div.form div.fields div.field div.label,
 
#content div.box-right div.form div.fields div.field div.label
 
{
 
    left: 0;
 
    margin: 0;
 
    padding: 0 0 8px 0;
 
    width: auto;
 
    position: relative;
 
    clear: both;
 
    overflow: hidden;
 
 
}
 
 
/* -----------------------------------------------------------
 
	content -> right -> forms -> label (select)
 
----------------------------------------------------------- */
 
 
@@ -3266,96 +3272,126 @@ div.browserblock .browser-header span {
 
}
 
 
div.browserblock .browser-body {
 
	background: #EEEEEE;
 
}
 
 
table.code-browser {
 
	border-collapse: collapse;
 
	width: 100%;
 
}
 
 
table.code-browser tr {
 
	margin: 3px;
 
}
 
 
table.code-browser thead th {
 
	background-color: #EEEEEE;
 
	height: 20px;
 
	font-size: 1.1em;
 
	font-weight: bold;
 
	text-align: center;
 
	text-align: left;
 
	padding-left: 10px;
 
}
 
 
table.code-browser tbody tr {
 
	
 
}
 
 
table.code-browser tbody td {
 
	padding-left: 10px;
 
	height: 20px;
 
}
 
table.code-browser .browser-file {
 
	background: url("/images/icons/document_16.png") no-repeat scroll 3px;
 
	height: 16px;
 
	padding-left: 20px;
 
	text-align: left;
 
}
 
 
table.code-browser .browser-dir {
 
	background: url("/images/icons/folder_16.png") no-repeat scroll 3px;
 
	height: 16px;
 
	padding-left: 20px;
 
	text-align: left;
 
}
 
 
/* -----------------------------------------------------------
 
    SEARCH
 
----------------------------------------------------------- */
 
 
.box .search {
 
	clear:both;
 
	margin:0;
 
	overflow:hidden;
 
	padding:0 20px 10px;
 
}
 
.box .search div.search_path{
 
    background:none repeat scroll 0 0 #EEEEEE;
 
    border:1px solid #CCCCCC;
 
 
    color:blue;
 
    padding:10px 0;
 
    margin-bottom:10px;
 
}
 
.box .search div.search_path div.link{
 
	font-weight:bold;
 
	margin-left:25px;
 
}
 
.box .search div.search_path div.link a{
 
	color:#0066CC;
 
	cursor:pointer;
 
	text-decoration:none;
 
}
 
 
 
 
/* -----------------------------------------------------------
 
	ADMIN - SETTINGS
 
----------------------------------------------------------- */
 
#path_unlock{
 
	color: red;
 
	font-size: 1.2em;
 
	padding-left: 4px;
 
}
 
 
/* -----------------------------------------------------------
 
    INFOBOX
 
----------------------------------------------------------- */
 
.info_box *{
 
	background:url("../../images/pager.png") repeat-x scroll 0 0 #EBEBEB;
 
	border-color:#DEDEDE #C4C4C4 #C4C4C4 #CFCFCF;
 
	border-style:solid;
 
	border-width:1px;
 
	color:#4A4A4A;
 
	display:block;
 
	font-weight:bold;
 
	height:1%;
 
	padding:4px 6px;
 
	display: inline;
 
}
 
.info_box span{
 
    margin-left:3px;
 
    margin-right:3px;
 
}
 
.info_box input#at_rev {
 
	padding:5px 3px 3px 2px;
 
	text-align:center;
 
}
 
.info_box input#view {
 
	padding:4px 3px 2px 2px;
 
	text-align:center;
 
}
 
/* -----------------------------------------------------------
 
    YUI TOOLTIP
 
----------------------------------------------------------- */
 
.yui-overlay,.yui-panel-container {
 
    visibility: hidden;
 
    position: absolute;
 
    z-index: 2;
 
}
 
 
.yui-tt {
 
    visibility: hidden;
 
    position: absolute;
 
    color: #666666;
rhodecode/templates/search/search.html
Show inline comments
 
## -*- coding: utf-8 -*-
 
<%inherit file="/base/base.html"/>
 
<%def name="title()">
 
   ${_('Search')} 
 
	%if c.repo_name:
 
		${_('in repository: ') + c.repo_name}
 
	%else:
 
		${_('in all repositories')}		
 
	%endif
 
	:${c.cur_query}
 
</%def>
 
<%def name="breadcrumbs()">
 
	${c.rhodecode_name}
 
</%def>
 
<%def name="page_nav()">
 
	${self.menu('home')}
 
</%def>
 
<%def name="main()">
 

	
 
<div class="box">
 
	<!-- box / title -->
 
	<div class="title">
 
		<h5>${_('Search')}
 
		%if c.repo_name:
 
			${_('in repository: ') + c.repo_name}
 
		%else:
 
			${_('in all repositories')}
 
		%endif		
 
		</h5>
 
	</div>
 
	<!-- end box / title -->
 
	%if c.repo_name:
 
		${h.form(h.url('search_repo',search_repo=c.repo_name),method='get')}	
 
	%else:
 
		${h.form(h.url('search'),method='get')}
 
	%endif
 
	<div class="form">
 
		<div class="fields">
 
			<div class="field field-first field-noborder">
 
             <div class="label">
 
                 <label for="q">${_('Search term')}</label>
 
             </div> 			
 
				<div class="input">${h.text('q',c.cur_query,class_="small")}</div>
 
				<div class="button highlight">
 
					<input type="submit" value="${_('Search')}" class="ui-button ui-widget ui-state-default ui-corner-all"/>
 
				</div>
 
				<div style="font-weight: bold;clear:Both;margin-left:200px">${c.runtime}</div>		
 
			</div>
 
		
 
			<div class="field ">
 
				<div class="label">
 
					<label for="q">${_('Search')}:</label>
 
	                <label for="type">${_('Search in')}</label>
 
				</div>
 
				<div class="input">
 
					${h.text('q',c.cur_query,class_="small")}
 
					<div class="button highlight">
 
						<input type="submit" value="${_('Search')}" class="ui-button ui-widget ui-state-default ui-corner-all"/>
 
					</div>		
 
					<div style="font-weight: bold;clear:both;padding: 5px">${c.runtime}</div>			
 
                <div class="select">
 
                    ${h.select('type',c.cur_type,[('content',_('Source codes')),
 
                        ##('commit',_('Commit messages')),
 
                        ('path',_('File names')),
 
                        ##('repository',_('Repository names')),
 
                        ])} 
 
				</div>
 
			</div>
 
			             
 
		</div>
 
	</div>
 
	${h.end_form()}
 
	
 
	%for cnt,sr in enumerate(c.formated_results):
 
		%if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(sr['repository'],'search results check'):
 
		<div class="table">
 
			<div id="body${cnt}" class="codeblock">
 
				<div class="code-header">
 
					<div class="revision">${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
 
					h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}</div>
 
				</div>
 
				<div class="code-body">
 
					<pre>${h.literal(sr['content_short_hl'])}</pre>
 
				</div>
 
			</div>
 
		</div>
 
		%else:
 
			%if cnt == 0:
 
			<div class="table">
 
				<div id="body${cnt}" class="codeblock">
 
					<div class="error">${_('Permission denied')}</div>
 
				</div>
 
			</div>		
 
			%endif
 
			
 
		%endif		
 
	%endfor
 
	%if c.cur_query:
 
	<div class="pagination-wh pagination-left">
 
		${c.formated_results.pager('$link_previous ~2~ $link_next')}
 
	</div>	
 
    %if c.cur_search == 'content':
 
        <%include file='search_content.html'/>
 
    %elif c.cur_search == 'path':
 
        <%include file='search_path.html'/>
 
    %elif c.cur_search == 'commit':
 
        <%include file='search_commit.html'/>
 
    %elif c.cur_search == 'repository':
 
        <%include file='search_repository.html'/>
 
	%endif
 
</div>
 

	
 
</%def>    
rhodecode/templates/search/search_commit.html
Show inline comments
 
new file 100644
rhodecode/templates/search/search_content.html
Show inline comments
 
new file 100644
 
##content highligthing
 

	
 
%for cnt,sr in enumerate(c.formated_results):
 
    %if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(sr['repository'],'search results check'):
 
    <div class="table">
 
        <div id="body${cnt}" class="codeblock">
 
            <div class="code-header">
 
                <div class="revision">${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
 
                h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}</div>
 
            </div>
 
            <div class="code-body">
 
                <pre>${h.literal(sr['content_short_hl'])}</pre>
 
            </div>
 
        </div>
 
    </div>
 
    %else:
 
        %if cnt == 0:
 
        <div class="table">
 
            <div id="body${cnt}" class="codeblock">
 
                <div class="error">${_('Permission denied')}</div>
 
            </div>
 
        </div>      
 
        %endif
 
        
 
    %endif      
 
%endfor
 
%if c.cur_query and c.formated_results:
 
<div class="pagination-wh pagination-left">
 
    ${c.formated_results.pager('$link_previous ~2~ $link_next')}
 
</div>  
 
%endif
 
\ No newline at end of file
rhodecode/templates/search/search_path.html
Show inline comments
 
new file 100644
 
##path search
 
<div class="search">
 
	%for cnt,sr in enumerate(c.formated_results):
 
	    %if h.HasRepoPermissionAny('repository.write','repository.read','repository.admin')(sr['repository'],'search results check'):
 
		    <div class="search_path">
 
		        <div class="link">
 
		            ${h.link_to(h.literal('%s &raquo; %s' % (sr['repository'],sr['f_path'])),
 
		                h.url('files_home',repo_name=sr['repository'],revision='tip',f_path=sr['f_path']))}        
 
		        </div>
 
		    </div>
 
	    %else:
 
	        %if cnt == 0:
 
			    <div class="error">
 
			        <div class="link">
 
			            ${_('Permission denied')}        
 
			        </div>
 
			    </div>        
 
	        %endif
 
	        
 
	    %endif      
 
	%endfor
 
	%if c.cur_query and c.formated_results:
 
	<div class="pagination-wh pagination-left">
 
	    ${c.formated_results.pager('$link_previous ~2~ $link_next')}
 
	</div>  
 
	%endif
 
</div>
 
\ No newline at end of file
rhodecode/templates/search/search_repository.html
Show inline comments
 
new file 100644
0 comments (0 inline, 0 general)