#! /usr/bin/env python -- # -*- python -*- ############################################################################## # # Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved. # # This software is subject to the provisions of the Zope Public License, # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS # FOR A PARTICULAR PURPOSE # ############################################################################## '''Structured Text Manipulation Parse a structured text string into a form that can be used with structured formats, like html. Structured text is text that uses indentation and simple symbology to indicate the structure of a document. A structured string consists of a sequence of paragraphs separated by one or more blank lines. Each paragraph has a level which is defined as the minimum indentation of the paragraph. A paragraph is a sub-paragraph of another paragraph if the other paragraph is the last preceding paragraph that has a lower level. Special symbology is used to indicate special constructs: - A single-line paragraph whose immediately succeeding paragraphs are lower level is treated as a header. - A paragraph that begins with a '-', '*', or 'o' is treated as an unordered list (bullet) element. - A paragraph that begins with a sequence of digits followed by a white-space character is treated as an ordered list element. - A paragraph that begins with a sequence of sequences, where each sequence is a sequence of digits or a sequence of letters followed by a period, is treated as an ordered list element. - A paragraph with a first line that contains some text, followed by some white-space and '--' is treated as a descriptive list element. The leading text is treated as the element title. - Sub-paragraphs of a paragraph that ends in the word 'example' or the word 'examples', or '::' is treated as example code and is output as is. - Text enclosed single quotes (with white-space to the left of the first quote and whitespace or punctuation to the right of the second quote) is treated as example code. - Text surrounded by '*' characters (with white-space to the left of the first '*' and whitespace or punctuation to the right of the second '*') is emphasized. - Text surrounded by '**' characters (with white-space to the left of the first '**' and whitespace or punctuation to the right of the second '**') is made strong. - Text surrounded by '_' underscore characters (with whitespace to the left and whitespace or punctuation to the right) is made underlined. - Text encloded by double quotes followed by a colon, a URL, and concluded by punctuation plus white space, *or* just white space, is treated as a hyper link. For example: "Zope":http://www.zope.org/ is ... Is interpreted as 'Zope is ....' Note: This works for relative as well as absolute URLs. - Text enclosed by double quotes followed by a comma, one or more spaces, an absolute URL and concluded by punctuation plus white space, or just white space, is treated as a hyper link. For example: "mail me", mailto:amos@digicool.com. Is interpreted as 'mail me.' - Text enclosed in brackets which consists only of letters, digits, underscores and dashes is treated as hyper links within the document. For example: As demonstrated by Smith [12] this technique is quite effective. Is interpreted as '... by Smith [12] this ...'. Together with the next rule this allows easy coding of references or end notes. - Text enclosed in brackets which is preceded by the start of a line, two periods and a space is treated as a named link. For example: .. [12] "Effective Techniques" Smith, Joe ... Is interpreted as '[12] "Effective Techniques" ...'. Together with the previous rule this allows easy coding of references or end notes. - A paragraph that has blocks of text enclosed in '||' is treated as a table. The text blocks correspond to table cells and table rows are denoted by newlines. By default the cells are center aligned. A cell can span more than one column by preceding a block of text with an equivalent number of cell separators '||'. Newlines and '|' cannot be a part of the cell text. For example: |||| **Ingredients** || || *Name* || *Amount* || ||Spam||10|| ||Eggs||3|| is interpreted as::
Ingredients
Name Amount
Spam 10
Eggs 3
''' import warnings warnings.warn('The StructuredText package is deprecated and will be removed ' 'in Zope 2.12. Use zope.structuredtext instead.', DeprecationWarning, stacklevel=2) import ts_regex import string, re def untabify(aString, indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group, ): '''\ Convert indentation tabs to spaces. ''' result='' rest=aString while 1: ts_results = indent_tab(rest, (1,2)) if ts_results: start, grps = ts_results lnl=len(grps[0]) indent=len(grps[1]) result=result+rest[:start] rest="\n%s%s" % (' ' * ((indent/8+1)*8), rest[start+indent+1+lnl:]) else: return result+rest def indent(aString, indent=2): """Indent a string the given number of spaces""" r=untabify(aString).split('\n') if not r: return '' if not r[-1]: del r[-1] tab=' '*indent return "%s%s\n" % (tab,('\n'+tab).join(r)) def reindent(aString, indent=2, already_untabified=0): "reindent a block of text, so that the minimum indent is as given" if not already_untabified: aString=untabify(aString) l=indent_level(aString)[0] if indent==l: return aString r=[] append=r.append if indent > l: tab=' ' * (indent-l) for s in aString.split('\n'): append(tab+s) else: l=l-indent for s in aString.split('\n'): append(s[l:]) return '\n'.join(r) def indent_level(aString, indent_space=ts_regex.compile('\n\( *\)').search_group, ): '''\ Find the minimum indentation for a string, not counting blank lines. ''' start=0 text='\n'+aString indent=l=len(text) while 1: ts_results = indent_space(text, (1,2), start) if ts_results: start, grps = ts_results i=len(grps[0]) start=start+i+1 if start < l and text[start] != '\n': # Skip blank lines if not i: return (0,aString) if i < indent: indent = i else: return (indent,aString) def paragraphs(list,start): l=len(list) level=list[start][0] i=start+1 while i < l and list[i][0] > level: i=i+1 return i-1-start def structure(list): if not list: return [] i=0 l=len(list) r=[] while i < l: sublen=paragraphs(list,i) i=i+1 r.append((list[i-1][1],structure(list[i:i+sublen]))) i=i+sublen return r class Table: CELL=' %s\n' ROW=' \n%s \n' TABLE='\n\n%s
' def create(self,aPar, td_reg=re.compile(r'[ \t\n]*\|\|([^\0|]*)') ): '''parses a table and returns nested list representing the table''' self.table=[] text=filter(None,aPar.split('\n')) for line in text: row=[] while 1: mo = td_reg.match(line) if not mo: return 0 pos = mo.end(1) row.append(mo.group(1)) if pos==len(line):break line=line[pos:] self.table.append(row) return 1 def html(self): '''Creates an HTML representation of table''' htmltable=[] for row in self.table: htmlrow=[] colspan=1 for cell in row: if cell=='': colspan=colspan+1 continue else: htmlrow.append(self.CELL%(colspan,cell)) colspan=1 htmltable.append(self.ROW % ''.join(htmlrow)) return self.TABLE % ''.join(htmltable) table=Table() class StructuredText: """Model text as structured collection of paragraphs. Structure is implied by the indentation level. This class is intended as a base classes that do actual text output formatting. """ def __init__(self, aStructuredString, level=0, paragraph_divider=ts_regex.compile('\(\r?\n *\)+\r?\n'), ): '''Convert a structured text string into a structured text object. Aguments: aStructuredString -- The string to be parsed. level -- The level of top level headings to be created. ''' pat = ' \"([%s0-9-_,./?=@~&]*)\":' % string.letters+ \ '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \ '([.:?;] )' p_reg = re.compile(pat,re.M) aStructuredString = p_reg.sub(r'\1\3 ' , aStructuredString) pat = ' \"([%s0-9-_,./?=@~&]*)\", ' % string.letters+ \ '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \ '([.:?;] )' p_reg = re.compile(pat,re.M) aStructuredString = p_reg.sub(r'\1\3 ' , aStructuredString) protoless = aStructuredString.find('\2\3',s) s=under.sub( r'\1\2\3',s) s=code.sub( r'\1\2\3',s) s=em.sub( r'\1\2\3',s) return s class HTML(StructuredText): '''\ An HTML structured text formatter. '''\ def __str__(self, extra_dl=re.compile("\n
"), extra_ul=re.compile("\n