| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2006-2009 Zuza Software Foundation
5 #
6 # This file is part of the Translate Toolkit.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, see <http://www.gnu.org/licenses/>.
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 try:
26 from lxml import etree
27 from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, \
28 getXMLspace, setXMLspace, namespaced
29 except ImportError, e:
30 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
31
32 from translate.storage import base
33 from translate.lang import data
34
35
37 """
38 A single unit in the file. Provisional work is done to make several
39 languages possible.
40 """
41
42 #The name of the root element of this unit type:(termEntry, tu, trans-unit)
43 rootNode = ""
44 # The name of the per language element of this unit type:(termEntry, tu,
45 # trans-unit)
46 languageNode = ""
47 #The name of the innermost element of this unit type:(term, seg)
48 textNode = ""
49
50 namespace = None
51 _default_xml_space = "preserve"
52 """The default handling of spacing in the absense of an xml:space
53 attribute.
54
55 This is mostly for correcting XLIFF behaviour."""
56
58 """Constructs a unit containing the given source string"""
59 self._rich_source = None
60 self._rich_target = None
61 if empty:
62 self._state_n = 0
63 return
64 self.xmlelement = etree.Element(self.namespaced(self.rootNode))
65 #add descrip, note, etc.
66 super(LISAunit, self).__init__(source)
67
69 """Compares two units"""
70 if not isinstance(other, LISAunit):
71 return super(LISAunit, self).__eq__(other)
72 languageNodes = self.getlanguageNodes()
73 otherlanguageNodes = other.getlanguageNodes()
74 if len(languageNodes) != len(otherlanguageNodes):
75 return False
76 for i in range(len(languageNodes)):
77 mytext = self.getNodeText(languageNodes[i],
78 getXMLspace(self.xmlelement,
79 self._default_xml_space))
80 othertext = other.getNodeText(otherlanguageNodes[i],
81 getXMLspace(self.xmlelement,
82 self._default_xml_space))
83 if mytext != othertext:
84 #TODO:^ maybe we want to take children and notes into account
85 return False
86 return True
87
89 """Returns name in Clark notation.
90
91 For example namespaced("source") in an XLIFF document might return::
92 {urn:oasis:names:tc:xliff:document:1.1}source
93 This is needed throughout lxml.
94 """
95 return namespaced(self.namespace, name)
96
98 languageNodes = self.getlanguageNodes()
99 if len(languageNodes) > 0:
100 self.xmlelement.replace(languageNodes[0], dom_node)
101 else:
102 self.xmlelement.append(dom_node)
103
106 source_dom = property(get_source_dom, set_source_dom)
107
109 if self._rich_source is not None:
110 self._rich_source = None
111 text = data.forceunicode(text)
112 self.source_dom = self.createlanguageNode(sourcelang, text, "source")
113
115 return self.getNodeText(self.source_dom,
116 getXMLspace(self.xmlelement,
117 self._default_xml_space))
118 source = property(getsource, setsource)
119
121 languageNodes = self.getlanguageNodes()
122 assert len(languageNodes) > 0
123 if dom_node is not None:
124 if append or len(languageNodes) == 0:
125 self.xmlelement.append(dom_node)
126 else:
127 self.xmlelement.insert(1, dom_node)
128 if not append and len(languageNodes) > 1:
129 self.xmlelement.remove(languageNodes[1])
130
132 if lang:
133 return self.getlanguageNode(lang=lang)
134 else:
135 return self.getlanguageNode(lang=None, index=1)
136 target_dom = property(get_target_dom)
137
139 """Sets the "target" string (second language), or alternatively
140 appends to the list"""
141 #XXX: we really need the language - can't really be optional, and we
142 # need to propagate it
143 if self._rich_target is not None:
144 self._rich_target = None
145 text = data.forceunicode(text)
146 # Firstly deal with reinitialising to None or setting to identical
147 # string
148 if self.gettarget() == text:
149 return
150 languageNode = self.get_target_dom(None)
151 if not text is None:
152 if languageNode is None:
153 languageNode = self.createlanguageNode(lang, text, "target")
154 self.set_target_dom(languageNode, append)
155 else:
156 if self.textNode:
157 terms = languageNode.iter(self.namespaced(self.textNode))
158 try:
159 languageNode = terms.next()
160 except StopIteration, e:
161 pass
162 languageNode.text = text
163 else:
164 self.set_target_dom(None, False)
165
167 """retrieves the "target" text (second entry), or the entry in the
168 specified language, if it exists"""
169 return self.getNodeText(self.get_target_dom(lang),
170 getXMLspace(self.xmlelement,
171 self._default_xml_space))
172 target = property(gettarget, settarget)
173
175 """Returns a xml Element setup with given parameters to represent a
176 single language entry. Has to be overridden."""
177 return None
178
180 """Create the text node in parent containing all the ph tags"""
181 matches = _getPhMatches(text)
182 if not matches:
183 parent.text = text
184 return
185
186 # Now we know there will definitely be some ph tags
187 start = matches[0].start()
188 pretext = text[:start]
189 if pretext:
190 parent.text = pretext
191 lasttag = parent
192 for i, m in enumerate(matches):
193 #pretext
194 pretext = text[start:m.start()]
195 # this will never happen with the first ph tag
196 if pretext:
197 lasttag.tail = pretext
198 #ph node
199 phnode = etree.SubElement(parent, self.namespaced("ph"))
200 phnode.set("id", str(i+1))
201 phnode.text = m.group()
202 lasttag = phnode
203 start = m.end()
204 #post text
205 if text[start:]:
206 lasttag.tail = text[start:]
207
209 """Returns a list of all nodes that contain per language information.
210 """
211 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
212
214 """Retrieves a languageNode either by language or by index"""
215 if lang is None and index is None:
216 raise KeyError("No criterea for languageNode given")
217 languageNodes = self.getlanguageNodes()
218 if lang:
219 for set in languageNodes:
220 if getXMLlang(set) == lang:
221 return set
222 else:#have to use index
223 if index >= len(languageNodes):
224 return None
225 else:
226 return languageNodes[index]
227 return None
228
230 """Retrieves the term from the given languageNode"""
231 if languageNode is None:
232 return None
233 if self.textNode:
234 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
235 if terms is None:
236 return None
237 else:
238 return getText(terms.next(), xml_space)
239 else:
240 return getText(languageNode, xml_space)
241
245
248
249 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
250 lambda self, value: self._set_property(self.namespaced('xid'), value))
251
252 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
253 lambda self, value: self._set_property(self.namespaced('rid'), value))
254
259 createfromxmlElement = classmethod(createfromxmlElement)
260
261
263 """A class representing a file store for one of the LISA file formats."""
264 UnitClass = LISAunit
265 #The root node of the XML document:
266 rootNode = ""
267 #The root node of the content section:
268 bodyNode = ""
269 #The XML skeleton to use for empty construction:
270 XMLskeleton = ""
271
272 namespace = None
273
274 - def __init__(self, inputfile=None, sourcelanguage='en',
275 targetlanguage=None, unitclass=None):
276 super(LISAfile, self).__init__(unitclass=unitclass)
277 if inputfile is not None:
278 self.parse(inputfile)
279 assert self.document.getroot().tag == self.namespaced(self.rootNode)
280 else:
281 # We strip out newlines to ensure that spaces in the skeleton
282 # doesn't interfere with the the pretty printing of lxml
283 self.parse(self.XMLskeleton.replace("\n", ""))
284 self.setsourcelanguage(sourcelanguage)
285 self.settargetlanguage(targetlanguage)
286 self.addheader()
287 self._encoding = "UTF-8"
288
292
294 """Returns name in Clark notation.
295
296 For example namespaced("source") in an XLIFF document might return::
297 {urn:oasis:names:tc:xliff:document:1.1}source
298 This is needed throughout lxml.
299 """
300 return namespaced(self.namespace, name)
301
303 """Initialises self.body so it never needs to be retrieved from the
304 XML again."""
305 self.namespace = self.document.getroot().nsmap.get(None, None)
306 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
307
309 #TODO: miskien moet hierdie eerder addsourcestring of iets genoem word?
310 """Adds and returns a new unit with the given string as first entry."""
311 newunit = self.UnitClass(source)
312 self.addunit(newunit)
313 return newunit
314
316 unit.namespace = self.namespace
317 super(LISAfile, self).addunit(unit)
318 if new:
319 self.body.append(unit.xmlelement)
320
322 """Converts to a string containing the file's XML"""
323 return etree.tostring(self.document, pretty_print=True,
324 xml_declaration=True, encoding='utf-8')
325
327 """Populates this object from the given xml string"""
328 if not hasattr(self, 'filename'):
329 self.filename = getattr(xml, 'name', '')
330 if hasattr(xml, "read"):
331 xml.seek(0)
332 posrc = xml.read()
333 xml = posrc
334 if etree.LXML_VERSION >= (2, 1, 0):
335 #Since version 2.1.0 we can pass the strip_cdata parameter to
336 #indicate that we don't want cdata to be converted to raw XML
337 parser = etree.XMLParser(strip_cdata=False)
338 else:
339 parser = etree.XMLParser()
340 self.document = etree.fromstring(xml, parser).getroottree()
341 self._encoding = self.document.docinfo.encoding
342 self.initbody()
343 assert self.document.getroot().tag == self.namespaced(self.rootNode)
344 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
345 term = self.UnitClass.createfromxmlElement(entry)
346 self.addunit(term, new=False)
347
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Apr 12 18:12:02 2011 | http://epydoc.sourceforge.net |