mò *á¸Ec@s¯dklZdkZdkZdkZdklZlZlZdkl Z dk l Z dk l Z dklZlZdklZdklZd efd „ƒYZdS( (sLexiconN(sIISetsunions IITreeSet(sOIBTree(sIOBTree(sOOBTree(sOrsOp(srandid(s UnicodeTypetGlobbingLexiconcBs¡tZdZdZdZdZeed„Zd„Zdd„Z d„Z d „Z e Z d „Z d „Zd „Zd „Zd„Zedd„Zd„ZRS(s Lexicon which supports basic globbing function ('*' and '?'). This lexicon keeps several data structures around that are useful for searching. They are: '_lexicon' -- Contains the mapping from word => word_id '_inverseLex' -- Contains the mapping from word_id => word '_digrams' -- Contains a mapping from digram => word_id Before going further, it is necessary to understand what a digram is, as it is a core component of the structure of this lexicon. A digram is a two-letter sequence in a word. For example, the word 'zope' would be converted into the digrams:: ['$z', 'zo', 'op', 'pe', 'e$'] where the '$' is a word marker. It is used at the beginning and end of the words. Those digrams are significant. t*t?t$cCs5|iƒ||_||_ti|iƒ|_dS(N(tselftcleart useSplittertextratsplitterParamstSplittert getSplittert SplitterFunc(RRR((tO/data/zmath/zope/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.pyt__init__:s   cCs(tƒ|_tƒ|_tƒ|_dS(N(tOIBTreeRt_lexicontIOBTreet _inverseLextOOBTreet_digrams(R((R R@s  iÈcCsyti||ƒt|iƒtjodSndkl}|i}tƒ|_|i |i_ |||i|t ƒdS(N(sconvert( tLexicont_convertBTreesRt thresholdttypeRRtBTrees.converttconvertt_p_jart IITreeSet(RRRR((R REs   cCsHd|d}g}tt|ƒdƒD]}||||d!q)~S(s3Returns a list with the set of digrams in the word.RiiN(twordt_[1]trangetlenti(RRR R((R t createDigramsQscCs3|ii|ƒo|i|Sn|i|ƒSdS(s5Provided 'word', return the matching integer word id.N(RRthas_keyRt assignWordId(RR((R t getWordIdXscCs|ii|dƒS(N(RRtgettwidtNone(RR&((R tgetWordbscCs(|ii|ƒo|i|Sn|i}y |i}WnNtj oB|o|iƒdd}nt ƒ|_d}||||i||d|d|iid|iid|iiƒSWn|i||ƒSnXdS(s wrap the splitter tencodingt singlechart indexnumberst casefoldingN( RR tastringR@RPRtsplitterSingleCharstsplitterIndexNumberstsplitterCasefolding(RRTR@RP((R R æs   cCst|tƒp+tiddƒ}ti||dƒ}n7h}xdD]}d|t |ƒ.Rs.*Rt.s%s$N( RJtpatt UnicodeTypetstringt maketranst transTablet translateR:tchR'tordtreplace(RRYR]R_R:((R R>ùs  (t__name__t __module__t__doc__R2R3R7R'R RRR!R$R/R(R#R%RERKR R>(((R Rs"      & ;  (RR R<R[tBTrees.IIBTreeR0R;RtBTrees.OIBTreeRtBTrees.IOBTreeRtBTrees.OOBTreeRt*Products.PluginIndexes.TextIndex.TextIndexRNRLt$Products.PluginIndexes.common.randidR-ttypesRZR(R RR[R;R0R-R<RZRRRRLRNRR((R Rs