| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
7 #
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22 """This module represents Khmer language.
23
24 For more information, see U{http://en.wikipedia.org/wiki/Khmer_language}
25 """
26
27 import re
28
29 from translate.lang import common
30
31
33 """This class represents Khmer."""
34
35 khmerpunc = u"។៕៖៘"
36 """These marks are only used for Khmer."""
37
38 punctuation = u"".join([common.Common.commonpunc, common.Common.quotes,
39 common.Common.miscpunc, khmerpunc])
40
41 sentenceend = u"!?…។៕៘"
42
43 sentencere = re.compile(r"""(?s) #make . also match newlines
44 .*? #anything, but match non-greedy
45 [%s] #the puntuation for sentence ending
46 \s+ #the spacing after the puntuation
47 (?=[^a-z\d])#lookahead that next part starts with caps
48 """ % sentenceend, re.VERBOSE)
49 #\u00a0 is non-breaking space
50 puncdict = {
51 u".": u"\u00a0។",
52 u":": u"\u00a0៖",
53 u"!": u"\u00a0!",
54 u"?": u"\u00a0?",
55 }
56
57 ignoretests = ["startcaps", "simplecaps"]
58
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Apr 12 18:12:07 2011 | http://epydoc.sourceforge.net |