_xmlgen.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. """
  2. module for generating and serializing xml and html structures
  3. by using simple python objects.
  4. (c) holger krekel, holger at merlinux eu. 2009
  5. """
  6. import sys, re
  7. if sys.version_info >= (3,0):
  8. def u(s):
  9. return s
  10. def unicode(x, errors=None):
  11. if hasattr(x, '__unicode__'):
  12. return x.__unicode__()
  13. return str(x)
  14. else:
  15. def u(s):
  16. return unicode(s)
  17. unicode = unicode
  18. class NamespaceMetaclass(type):
  19. def __getattr__(self, name):
  20. if name[:1] == '_':
  21. raise AttributeError(name)
  22. if self == Namespace:
  23. raise ValueError("Namespace class is abstract")
  24. tagspec = self.__tagspec__
  25. if tagspec is not None and name not in tagspec:
  26. raise AttributeError(name)
  27. classattr = {}
  28. if self.__stickyname__:
  29. classattr['xmlname'] = name
  30. cls = type(name, (self.__tagclass__,), classattr)
  31. setattr(self, name, cls)
  32. return cls
  33. class Tag(list):
  34. class Attr(object):
  35. def __init__(self, **kwargs):
  36. self.__dict__.update(kwargs)
  37. def __init__(self, *args, **kwargs):
  38. super(Tag, self).__init__(args)
  39. self.attr = self.Attr(**kwargs)
  40. def __unicode__(self):
  41. return self.unicode(indent=0)
  42. __str__ = __unicode__
  43. def unicode(self, indent=2):
  44. l = []
  45. SimpleUnicodeVisitor(l.append, indent).visit(self)
  46. return u("").join(l)
  47. def __repr__(self):
  48. name = self.__class__.__name__
  49. return "<%r tag object %d>" % (name, id(self))
  50. Namespace = NamespaceMetaclass('Namespace', (object, ), {
  51. '__tagspec__': None,
  52. '__tagclass__': Tag,
  53. '__stickyname__': False,
  54. })
  55. class HtmlTag(Tag):
  56. def unicode(self, indent=2):
  57. l = []
  58. HtmlVisitor(l.append, indent, shortempty=False).visit(self)
  59. return u("").join(l)
  60. # exported plain html namespace
  61. class html(Namespace):
  62. __tagclass__ = HtmlTag
  63. __stickyname__ = True
  64. __tagspec__ = dict([(x,1) for x in (
  65. 'a,abbr,acronym,address,applet,area,article,aside,audio,b,'
  66. 'base,basefont,bdi,bdo,big,blink,blockquote,body,br,button,'
  67. 'canvas,caption,center,cite,code,col,colgroup,command,comment,'
  68. 'datalist,dd,del,details,dfn,dir,div,dl,dt,em,embed,'
  69. 'fieldset,figcaption,figure,footer,font,form,frame,frameset,h1,'
  70. 'h2,h3,h4,h5,h6,head,header,hgroup,hr,html,i,iframe,img,input,'
  71. 'ins,isindex,kbd,keygen,label,legend,li,link,listing,map,mark,'
  72. 'marquee,menu,meta,meter,multicol,nav,nobr,noembed,noframes,'
  73. 'noscript,object,ol,optgroup,option,output,p,param,pre,progress,'
  74. 'q,rp,rt,ruby,s,samp,script,section,select,small,source,span,'
  75. 'strike,strong,style,sub,summary,sup,table,tbody,td,textarea,'
  76. 'tfoot,th,thead,time,title,tr,track,tt,u,ul,xmp,var,video,wbr'
  77. ).split(',') if x])
  78. class Style(object):
  79. def __init__(self, **kw):
  80. for x, y in kw.items():
  81. x = x.replace('_', '-')
  82. setattr(self, x, y)
  83. class raw(object):
  84. """just a box that can contain a unicode string that will be
  85. included directly in the output"""
  86. def __init__(self, uniobj):
  87. self.uniobj = uniobj
  88. class SimpleUnicodeVisitor(object):
  89. """ recursive visitor to write unicode. """
  90. def __init__(self, write, indent=0, curindent=0, shortempty=True):
  91. self.write = write
  92. self.cache = {}
  93. self.visited = {} # for detection of recursion
  94. self.indent = indent
  95. self.curindent = curindent
  96. self.parents = []
  97. self.shortempty = shortempty # short empty tags or not
  98. def visit(self, node):
  99. """ dispatcher on node's class/bases name. """
  100. cls = node.__class__
  101. try:
  102. visitmethod = self.cache[cls]
  103. except KeyError:
  104. for subclass in cls.__mro__:
  105. visitmethod = getattr(self, subclass.__name__, None)
  106. if visitmethod is not None:
  107. break
  108. else:
  109. visitmethod = self.__object
  110. self.cache[cls] = visitmethod
  111. visitmethod(node)
  112. # the default fallback handler is marked private
  113. # to avoid clashes with the tag name object
  114. def __object(self, obj):
  115. #self.write(obj)
  116. self.write(escape(unicode(obj)))
  117. def raw(self, obj):
  118. self.write(obj.uniobj)
  119. def list(self, obj):
  120. assert id(obj) not in self.visited
  121. self.visited[id(obj)] = 1
  122. for elem in obj:
  123. self.visit(elem)
  124. def Tag(self, tag):
  125. assert id(tag) not in self.visited
  126. try:
  127. tag.parent = self.parents[-1]
  128. except IndexError:
  129. tag.parent = None
  130. self.visited[id(tag)] = 1
  131. tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
  132. if self.curindent and not self._isinline(tagname):
  133. self.write("\n" + u(' ') * self.curindent)
  134. if tag:
  135. self.curindent += self.indent
  136. self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
  137. self.parents.append(tag)
  138. for x in tag:
  139. self.visit(x)
  140. self.parents.pop()
  141. self.write(u('</%s>') % tagname)
  142. self.curindent -= self.indent
  143. else:
  144. nameattr = tagname+self.attributes(tag)
  145. if self._issingleton(tagname):
  146. self.write(u('<%s/>') % (nameattr,))
  147. else:
  148. self.write(u('<%s></%s>') % (nameattr, tagname))
  149. def attributes(self, tag):
  150. # serialize attributes
  151. attrlist = dir(tag.attr)
  152. attrlist.sort()
  153. l = []
  154. for name in attrlist:
  155. res = self.repr_attribute(tag.attr, name)
  156. if res is not None:
  157. l.append(res)
  158. l.extend(self.getstyle(tag))
  159. return u("").join(l)
  160. def repr_attribute(self, attrs, name):
  161. if name[:2] != '__':
  162. value = getattr(attrs, name)
  163. if name.endswith('_'):
  164. name = name[:-1]
  165. if isinstance(value, raw):
  166. insert = value.uniobj
  167. else:
  168. insert = escape(unicode(value))
  169. return ' %s="%s"' % (name, insert)
  170. def getstyle(self, tag):
  171. """ return attribute list suitable for styling. """
  172. try:
  173. styledict = tag.style.__dict__
  174. except AttributeError:
  175. return []
  176. else:
  177. stylelist = [x+': ' + y for x,y in styledict.items()]
  178. return [u(' style="%s"') % u('; ').join(stylelist)]
  179. def _issingleton(self, tagname):
  180. """can (and will) be overridden in subclasses"""
  181. return self.shortempty
  182. def _isinline(self, tagname):
  183. """can (and will) be overridden in subclasses"""
  184. return False
  185. class HtmlVisitor(SimpleUnicodeVisitor):
  186. single = dict([(x, 1) for x in
  187. ('br,img,area,param,col,hr,meta,link,base,'
  188. 'input,frame').split(',')])
  189. inline = dict([(x, 1) for x in
  190. ('a abbr acronym b basefont bdo big br cite code dfn em font '
  191. 'i img input kbd label q s samp select small span strike '
  192. 'strong sub sup textarea tt u var'.split(' '))])
  193. def repr_attribute(self, attrs, name):
  194. if name == 'class_':
  195. value = getattr(attrs, name)
  196. if value is None:
  197. return
  198. return super(HtmlVisitor, self).repr_attribute(attrs, name)
  199. def _issingleton(self, tagname):
  200. return tagname in self.single
  201. def _isinline(self, tagname):
  202. return tagname in self.inline
  203. class _escape:
  204. def __init__(self):
  205. self.escape = {
  206. u('"') : u('&quot;'), u('<') : u('&lt;'), u('>') : u('&gt;'),
  207. u('&') : u('&amp;'), u("'") : u('&apos;'),
  208. }
  209. self.charef_rex = re.compile(u("|").join(self.escape.keys()))
  210. def _replacer(self, match):
  211. return self.escape[match.group(0)]
  212. def __call__(self, ustring):
  213. """ xml-escape the given unicode string. """
  214. try:
  215. ustring = unicode(ustring)
  216. except UnicodeDecodeError:
  217. ustring = unicode(ustring, 'utf-8', errors='replace')
  218. return self.charef_rex.sub(self._replacer, ustring)
  219. escape = _escape()