##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Keyword index

$Id$
"""
from persistent import Persistent

from BTrees.IOBTree import IOBTree
from BTrees.OOBTree import OOBTree, OOSet, difference 
from BTrees.IIBTree import IISet, union, intersection
from BTrees.Length import Length

from types import StringTypes
from zope.index.interfaces import IInjection, IStatistics
from zope.index.keyword.interfaces import IKeywordQuerying
from zope.interface import implements

class KeywordIndex(Persistent):
    """ A case-insensitive keyword index """

    normalize = True
    implements(IInjection, IStatistics, IKeywordQuerying)

    def __init__(self):
        self.clear()

    def clear(self):
        """Initialize forward and reverse mappings."""

        # The forward index maps index keywords to a sequence of docids
        self._fwd_index = OOBTree()

        # The reverse index maps a docid to its keywords
        # TODO: Using a vocabulary might be the better choice to store
        # keywords since it would allow use to use integers instead of strings
        self._rev_index = IOBTree()
        self._num_docs = Length(0)

    def documentCount(self):
        """Return the number of documents in the index."""
        return self._num_docs()

    def wordCount(self):
        """Return the number of indexed words"""
        return len(self._fwd_index)

    def has_doc(self, docid):
        return bool(self._rev_index.has_key(docid))

    def index_doc(self, docid, seq):
        if isinstance(seq, StringTypes):
            raise TypeError('seq argument must be a list/tuple of strings')
    
        if not seq:
            return

        if self.normalize:
            seq = [w.lower() for w in seq]

        old_kw = self._rev_index.get(docid, None)
        new_kw = OOSet(seq)

        if old_kw is None:
            self._insert_forward(docid, new_kw)
            self._insert_reverse(docid, new_kw)
            self._num_docs.change(1)
        else:

            # determine added and removed keywords
            kw_added = difference(new_kw, old_kw)
            kw_removed = difference(old_kw, new_kw)

            # removed keywords are removed from the forward index
            for word in kw_removed:
                self._fwd_index[word].remove(docid)
            
            # now update reverse and forward indexes
            self._insert_forward(docid, kw_added)
            self._insert_reverse(docid, new_kw)
        
    def unindex_doc(self, docid):
        idx  = self._fwd_index

        try:
            for word in self._rev_index[docid]:
                idx[word].remove(docid)
                if not idx[word]:
                    del idx[word] 
        except KeyError:
            return
        
        try:
            del self._rev_index[docid]
        except KeyError:
            pass

        self._num_docs.change(-1)

    def _insert_forward(self, docid, words):
        """insert a sequence of words into the forward index """

        idx = self._fwd_index
        has_key = idx.has_key
        for word in words:
            if not has_key(word):
                idx[word] = IISet()
            idx[word].insert(docid)

    def _insert_reverse(self, docid, words):
        """ add words to forward index """

        if words:
            self._rev_index[docid] = words

    def search(self, query, operator='and'):
        """Execute a search given by 'query'."""
        if isinstance(query, StringTypes):
            query = [query]

        if self.normalize:
            query = [w.lower() for w in query]

        f = {'and' : intersection, 'or' : union}[operator]
    
        rs = None
        for word in query:
            docids = self._fwd_index.get(word, IISet())
            rs = f(rs, docids)
            
        if rs:
            return rs
        else:
            return IISet()

class CaseSensitiveKeywordIndex(KeywordIndex):
    """ A case-sensitive keyword index """
    normalize = False