CYCLUS
hdf5_back_gen.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 """This module generates HDF5 backend code found in src/hdf5_back.cc
3 
4 There are 8 distinct code generation options, one of which must be passed
5 as an argument to this module. They are CREATE, QUERY, VL_DATASET,
6 FILL_BUF, WRITE, VAL_TO_BUF_H, VAL_TO_BUF, and BUF_TO_VAL. Each of these
7 generates a different section of Hdf5 backend code. All are invoked by
8 src/CMakeLists.txt prior to C++ compilation. However, for debugging purposes,
9 each section can be printed individually by passing that section's identifier
10 as a command line argument. The entry point for each of these generation
11 routines is the function main_XXXX, where XXXX is a generation option.
12 
13 Example
14 -------
15 To generate the code found in src/hdf5_back.cc::Query, use
16 
17  $ python hdf5_back_gen.py QUERY
18 
19 """
20 import os
21 import sys
22 import json
23 from pprint import pformat
24 from itertools import chain
25 from collections import OrderedDict
26 
27 MAIN_DISPATCH = {}
28 
29 CANON_TO_NODE = {}
30 CANON_TYPES = []
31 DB_TO_CPP = {}
32 CANON_TO_DB = {}
33 DB_TO_VL = {}
34 INDENT = ' '
35 
36 NOT_VL = []
37 VARIATION_DICT = OrderedDict()
38 ORIGIN_DICT = OrderedDict()
39 ORIGIN_TO_VL = {}
40 
41 is_primitive = lambda t: isinstance(t.canon, str)
42 
43 class Node(object):
44  fields = ()
45 
46  def __init__(self, **kwargs):
47  seen = set()
48  for field, value in kwargs.items():
49  if field not in self.fields:
50  print(field, " is not a valid field")
51  raise RuntimeError
52  setattr(self, field, value)
53  seen.add(field)
54  for field in self.fields:
55  if field not in seen:
56  setattr(self, field, None)
57 
58  def __str__(self):
59  return PrettyFormatter(self).visit()
60 
61 class Block(Node):
62  fields = ("nodes",)
63 
64 class Var(Node):
65  fields = ("name",)
66 
67 class Type(Node):
68  fields = ("cpp", "db", "canon")
69 
70 class Decl(Node):
71  fields = ("type", "name")
72 
73 class Expr(Node):
74  fields = ("value",)
75 
76 class ExprStmt(Node):
77  fields = ("child",)
78 
79 class Line(Node):
80  fields = ("child",)
81 
82 class Assign(Node):
83  fields = ("target", "value")
84 
86  fields = ("type", "target", "value")
87 
88 class Case(Node):
89  fields = ("cond", "body")
90 
91 class If(Node):
92  fields = ("cond", "body", "elifs", "el")
93 
94 class For(Node):
95  fields = ("adecl", "cond", "incr", "body")
96 
97 class BinOp(Node):
98  fields = ("x", "op", "y")
99 
101  fields = ("op", "name")
102 
104  fields = ("name", "op")
105 
106 class FuncCall(Node):
107  # targs means template args
108  fields = ("name", "args", "targs")
109 
110 class FuncDef(Node):
111  fields = ("type", "name", "args", "targs", "body", "tspecial")
112 
113 class Raw(Node):
114  # for cheating and literals
115  fields = ("code",)
116 
117 class Nothing(Node):
118  # for "nothing"
119  fields = ()
120 
121 _lowername = lambda cls: cls.__name__.lower()
122 
123 class Visitor(object):
124  """Super-class for all classes that should walk over a tree of nodes.
125  This implements the visit() method.
126  """
127 
128  def __init__(self, tree=None):
129  self.tree = tree
130 
131  def visit(self, node=None):
132  """Walks over a node. If no node is provided, the tree is used."""
133  if node is None:
134  node = self.tree
135  if node is None:
136  raise RuntimeError('no node or tree given!')
137  for clsname in map(_lowername, type.mro(node.__class__)):
138  meth = getattr(self, 'visit_' + clsname, None)
139  if callable(meth):
140  rtn = meth(node)
141  break
142  else:
143  msg = 'could not find valid visitor method for {0} on {1}'
144  nodename = node.__class__.__name__
145  selfname = self.__class__.__name__
146  msg = msg.format(nodename, selfname)
147  try:
148  msg += "\n"
149  msg += str(node)
150  except Exception:
151  pass
152  raise AttributeError(msg)
153  return rtn
154 
156  """Formats a tree of nodes into a pretty string"""
157 
158  def __init__(self, tree=None, indent=' '):
159  super(PrettyFormatter, self).__init__(tree=tree)
160  self.level = 0
161  self.indent = indent
162 
163  def visit_node(self, node):
164  s = node.__class__.__name__ + '('
165  if len(node.fields) == 0:
166  return s + ')'
167  s += '\n'
168  self.level += 1
169  t = []
170  for field in node.fields:
171  a = getattr(node, field)
172  t.append(self.visit(a) if isinstance(a, Node) else pformat(a))
173  t = ['{0}={1}'.format(n, x) for n, x in zip(node.fields, t)]
174  s += indent(',\n'.join(t), self.indent)
175  self.level -= 1
176  s += '\n)'
177  return s
178 
180  def __init__(self, tree=None, indent=' ', debug=False):
181  super(CppGen, self).__init__(tree=tree)
182  self.level = 0
183  self.indent = indent
184  self.debug = debug
185 
186  def visit_var(self, node):
187  return node.name
188 
189  def visit_type(self, node):
190  return node.cpp
191 
192  def visit_decl(self, node):
193  s = self.visit(node.type)
194  s += " "
195  s += self.visit(node.name)
196  return s
197 
198  def visit_exprstmt(self, node):
199  s = self.visit(node.child)
200  s += ";"
201  if self.debug:
202  s = "std::cout << \"HDF5_DEBUG: " + s.replace('"', '\\"') + "\" << std::endl;\n" + s
203  s += '\n'
204  return s
205 
206  def visit_assign(self, node):
207  s = self.visit(node.target)
208  s += "="
209  s += self.visit(node.value)
210  return s
211 
212  def visit_declassign(self, node):
213  s = self.visit(node.type)
214  s += " "
215  s += self.visit(node.target)
216  s += "="
217  s += self.visit(node.value)
218  return s
219 
220  def visit_binop(self, node):
221  s = self.visit(node.x)
222  # s += " "
223  s += node.op
224  # s += " "
225  s += self.visit(node.y)
226  return s
227 
228  def visit_leftunaryop(self, node):
229  s = node.op
230  s += self.visit(node.name)
231  return s
232 
233  def visit_rightunaryop(self, node):
234  s = self.visit(node.name)
235  s += node.op
236  return s
237 
238  def visit_raw(self, node):
239  s = node.code
240  return s
241 
242  def visit_case(self, node):
243  s = "case "
244  s += self.visit(node.cond)
245  s += ": {\n"
246  for n in node.body:
247  s += indent(self.visit(n), self.indent)
248  s += "\n}\n"
249  return s
250 
251  def visit_if(self, node):
252  s = "if("
253  s += self.visit(node.cond)
254  s += "){\n"
255  for n in node.body:
256  s += indent(self.visit(n), self.indent)
257  s += "\n}"
258  # check if elifs is an empty list
259  if node.elifs:
260  for cond, body in node.elifs:
261  s += "else if("
262  s += self.visit(cond)
263  s += "){\n"
264  for n in body:
265  b = ""
266  b += self.visit(n)
267  s += indent(b, self.indent)
268  s += "\n}"
269  # check if else attribute exists
270  if node.el is not None:
271  s += "else{\n"
272  s += indent(self.visit(node.el), self.indent)
273  s += "\n}"
274  return s + "\n"
275 
276  def visit_for(self, node):
277  s = "for("
278  if node.adecl is not None:
279  s += self.visit(node.adecl) + ";"
280  else:
281  s += ";"
282  s += self.visit(node.cond)
283  s += ";"
284  s += self.visit(node.incr)
285  s += "){\n"
286  for n in node.body:
287  s += indent(self.visit(n), self.indent)
288  s += "\n}\n"
289  return s
290 
291  def visit_funccall(self, node):
292  s = self.visit(node.name)
293  if node.targs is not None:
294  s += "<"
295  for i in range(len(node.targs)):
296  s += self.visit(node.targs[i])
297  if i < (len(node.targs)-1):
298  s += ","
299  s += ">"
300  s += "("
301  for i in range(len(node.args)):
302  s += self.visit(node.args[i])
303  if i < (len(node.args)-1):
304  s += ", "
305  s += ")"
306  return s
307 
308  def visit_funcdef(self, node):
309  s = ""
310  if node.tspecial:
311  s += "template<>\n"
312  elif node.targs is not None:
313  s += "template<"
314  for i in range(len(node.targs)):
315  s += self.visit(t)
316  if i < (len(node.targs)-1):
317  s += ","
318  s += ">\n"
319  # We've taken care of the targs and no longer need them to appear
320  node.targs = None
321  s += self.visit(node.type)
322  s += " "
323  f = FuncCall(name=node.name, args=node.args, targs=node.targs)
324  s += self.visit(f)
325  b = Block(nodes=node.body)
326  s += " {\n"
327  s += indent(self.visit(b), self.indent)
328  s += "}\n"
329  return s
330 
331  def visit_nothing(self, node):
332  return ""
333 
334  def visit_block(self, node):
335  s = ""
336  for n in node.nodes:
337  s += self.visit(n)
338  return s
339 
340 def resolve_unicode(item):
341  """Translate unicode types into string types, if necessary.
342 
343  This function exists to support Python 2.7.
344 
345  Parameters
346  ----------
347  item : int or str or list
348  The list of items, or item to potentially encode.
349 
350  Returns
351  -------
352  int or str or list
353  The same type as was passed to the function, encoded if necessary
354  """
355  if isinstance(item, str):
356  return item
357  elif isinstance(item, tuple):
358  return tuple([resolve_unicode(i) for i in item])
359  elif isinstance(item, list):
360  return [resolve_unicode(i) for i in item]
361  else:
362  try:
363  return item.encode('utf-8')
364  except Exception:
365  pass
366  return item
367 
368 def convert_canonical(raw_list):
369  """Converts JSON list of lists to tuple of tuples.
370 
371  Parameters
372  ----------
373  raw_list : list or str
374  List to be converted, or str
375 
376  Returns
377  -------
378  str or tuple
379  Converted list, or str
380  """
381  if isinstance(raw_list, str):
382  return raw_list
383  return tuple(convert_canonical(x) for x in raw_list)
384 
385 def list_dependencies(canon):
386  """Return a list of a type's dependencies, each in canonical form.
387 
388  Parameters
389  ----------
390  canon : tuple or str
391  the canonical form of the type
392 
393  Returns
394  -------
395  list or str
396  list of dependencies or str if base type is primitive
397 
398  Examples:
399  >>> list_dep("('PAIR', 'INT', 'VL_STRING')")
400  [('PAIR', 'INT', 'VL_STRING'), 'INT', 'VL_STRING']
401  """
402  if isinstance(canon, str):
403  return canon
404 
405  dependency_list = [u for u in canon[1:]]
406  return [canon] + dependency_list
407 
408 def get_variable(name, depth=0, prefix=""):
409  """Return a C++ variable, appropriately formatted for depth.
410 
411  Parameters
412  ----------
413  name : str
414  Base variable name.
415  depth : int, optional
416  Depth of variable in relation to depth 0 type.
417  prefix : str, optional
418  New prefix to add, based on direct parent type.
419 
420  Returns
421  -------
422  str
423  Variable name.
424 
425  """
426  return name + str(depth) + prefix
427 
428 def get_prefix(base_prefix, parent_type, child_index):
429  """Return the prefix of a C++ variable, appropriately formatted for depth.
430 
431  Parameters
432  ----------
433  base_prefix : str
434  Prefix of direct parent type.
435  parent_type : Type
436  Node of parent type.
437  child_index : int
438  Index relative to direct parent.
439 
440  Returns
441  -------
442  str
443  New prefix.
444  """
445  return base_prefix + template_args[parent_type.canon[0]][child_index]
446 
447 def case_template(t, read_x):
448  """Represents C++ case statement.
449 
450  Parameters
451  ----------
452  t : Type
453  Depth 0 type.
454  read_x : Node
455  Nodes of case statement body.
456 
457  Returns
458  -------
459  Node
460  Complete case statement block.
461  """
462  if isinstance(read_x, Block):
463  body = read_x.nodes
464  else:
465  body = [read_x]
466  body.append(ExprStmt(child=Var(name="break")))
467  node = Case(cond=Var(name=t.db), body=body)
468  return node
469 
470 def primitive_setup(t, depth=0, prefix=""):
471  """HDF5 Query: Represents necessary setup steps for C++ primitives."""
472  node = Nothing()
473  return node
474 
475 def string_setup(depth=0, prefix=""):
476  """HDF5 Query: Represents necessary setup steps for C++ String."""
477  nullpos = "nullpos" + str(depth) + prefix
478 
479  node = Block(nodes=[
480  ExprStmt(child=Decl(type=Type(cpp="size_t"), name=Var(name=nullpos)))])
481  return node
482 
483 def vl_string_setup(depth=0, prefix=""):
484  """HDF5 Query: Represents necessary setup steps for C++ VL_String."""
485 
486  node = Block(nodes=[Nothing()])
487  return node
488 
489 
490 template_args = {"MAP": ("key", "val"),
491  "VECTOR": ("elem",),
492  "SET": ("elem",),
493  "LIST": ("elem",),
494  "PAIR": ("first", "second")}
495 variable_length_types = ["MAP", "LIST", "SET", "VECTOR"]
496 
497 
498 def get_setup(t, depth=0, prefix="", HDF5_type="tb_type", child_index='j'):
499  """HDF5 Query: Get nodes representing C++ setup.
500 
501  Primitive setups are called directly, while template types are handled
502  recursively.
503 
504  Parameters
505  ----------
506  t : Type
507  C++ type, canonical form.
508  depth : int, optional
509  Depth relative to initial, depth 0 type.
510  prefix : str, optional
511  Current prefix, determined by parent type.
512  HDF5_type : str
513  hid_t type used to access HDF5 methods
514  child_index : str or int
515  Index into parent type, None if only child
516 
517  Returns
518  -------
519  Block
520  Nodes required for type t setup.
521  """
522 
523  node = Node()
524  setup_nodes = []
525 
526  if not child_index is None:
527  field_type_var = get_variable("fieldtype", depth=depth, prefix=prefix)
528  field_type = ExprStmt(child=DeclAssign(
529  type=Type(cpp="hid_t"),
530  target=Var(name=field_type_var),
531  value=FuncCall(
532  name=Raw(code="H5Tget_member_type"),
533  args=[Raw(code=HDF5_type),
534  Raw(code=str(child_index))])))
535  HDF5_type = field_type_var
536 
537  total_size_var = get_variable("total_size", depth=depth, prefix=prefix)
538  total_size = ExprStmt(child=DeclAssign(type=Type(cpp="unsigned int"),
539  target=Var(name=total_size_var),
540  value=FuncCall(
541  name=Raw(code="H5Tget_size"),
542  args=[Raw(code=HDF5_type)])))
543  if is_primitive(t):
544  if t.canon == "STRING":
545  setup_nodes.append(string_setup(depth=depth, prefix=prefix))
546  elif t.canon == "VL_STRING":
547  setup_nodes.append(vl_string_setup(depth=depth, prefix=prefix))
548  else:
549  setup_nodes.append(primitive_setup(t, depth=depth, prefix=prefix))
550  if not child_index is None:
551  setup_nodes.append(field_type)
552  TEARDOWN_STACK.append(field_type_var)
553  setup_nodes.append(total_size)
554  node = Block(nodes=setup_nodes)
555  else:
556  if DB_TO_VL[t.db]:
557  setup_nodes.append(ExprStmt(child=DeclAssign(type=Type(cpp="unsigned int"),
558  target=Var(name=total_size_var),
559  value=Raw(code="CYCLUS_SHA1_SIZE"))))
560  return Block(nodes=setup_nodes)
561 
562  multi_items = (len(t.canon[1:]) > 1)
563 
564  children = len(t.canon) - 1
565 
566  if not child_index is None:
567  setup_nodes.append(field_type)
568  TEARDOWN_STACK.append(field_type_var)
569 
570  setup_nodes.append(total_size)
571 
572  if t.canon[0] in variable_length_types:
573  fieldlen_var = get_variable("fieldlen", depth=depth, prefix=prefix)
574  fieldlen = Block(nodes=[ExprStmt(child=Decl(
575  type=Type(cpp="hsize_t"),
576  name=Var(name=fieldlen_var))),
577  ExprStmt(child=FuncCall(
578  name=Raw(code="H5Tget_array_dims2"),
579  args=[Raw(code=HDF5_type),
580  Raw(code="&"+fieldlen_var)]))])
581  setup_nodes.append(fieldlen)
582  item_type_var = get_variable("item_type", depth=depth,
583  prefix=prefix)
584  item_type = ExprStmt(child=DeclAssign(
585  type=Type(cpp="hid_t"),
586  target=Var(name=item_type_var),
587  value=FuncCall(
588  name=Raw(code="H5Tget_super"),
589  args=[Raw(code=HDF5_type)])))
590  setup_nodes.append(item_type)
591  TEARDOWN_STACK.append(item_type_var)
592  HDF5_type = item_type_var
593  if multi_items:
594  setup_nodes.append(Block(nodes=[get_setup(
595  CANON_TO_NODE[new_type],
596  depth=depth+1,
597  prefix=prefix+part,
598  HDF5_type=HDF5_type,
599  child_index=index)
600  for new_type, part, index in zip(
601  t.canon[1:],
602  template_args[t.canon[0]],
603  [i for i in range(children)])]))
604  else:
605  setup_nodes.append(Block(nodes=[get_setup(
606  CANON_TO_NODE[new_type],
607  depth=depth+1,
608  prefix=prefix+part,
609  HDF5_type=HDF5_type,
610  child_index=None)
611  for new_type, part in zip(
612  t.canon[1:],
613  template_args[t.canon[0]])]))
614  else:
615  setup_nodes.append(Block(nodes=[get_setup(
616  CANON_TO_NODE[new_type],
617  depth=depth+1,
618  prefix=prefix+part,
619  HDF5_type=HDF5_type,
620  child_index=index)
621  for new_type, part, index in zip(
622  t.canon[1:],
623  template_args[t.canon[0]],
624  [i for i in range(children)])]))
625  node = Block(nodes=setup_nodes)
626  return node
627 
628 def get_decl(t, depth=0, prefix=""):
629  """HDF5 Query: Get node representing C++ type declaration.
630 
631  Declarations occur directly before bodies, created without recursion.
632 
633  Parameters
634  ----------
635  t : Type
636  C++ type, canonical form.
637  depth : int, optional
638  Depth relative to initial, depth 0 type.
639  prefix : str, optional
640  Prefix determined by parent type.
641 
642  Returns
643  -------
644  Node
645  Declaration statement as a node.
646  """
647  variable = get_variable("x", depth=depth, prefix=prefix)
648  node = ExprStmt(child=Decl(type=t, name=Var(name=variable)))
649  return node
650 
651 def reinterpret_cast_body(t, depth=0, prefix="", base_offset="buf+offset"):
652  """HDF5 Query: Represents a body using the reinterpret_cast method.
653 
654  This includes int, double, float, etc.
655  """
656  x = get_variable("x", depth=depth, prefix=prefix)
657  tree = Block(nodes=[
658  ExprStmt(child=Assign(
659  target=Var(name=x),
660  value=FuncCall(name=Raw(code="*reinterpret_cast"),
661  targs=[Raw(code=t.cpp+"*")],
662  args=[Raw(code=base_offset)])))])
663  return tree
664 
665 def string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None):
666  """HDF5 Query: Represents body for the C++ String primitive."""
667  if variable == None:
668  variable = get_variable("x", depth=depth, prefix=prefix)
669 
670  nullpos = get_variable("nullpos", depth=depth, prefix=prefix)
671 
672  total_size = get_variable("total_size", depth=depth, prefix=prefix)
673 
674  tree = Block(nodes=[
675  ExprStmt(child=Assign(target=Var(name=variable),
676  value=FuncCall(name=Raw(code=t.cpp),
677  args=[Raw(code=base_offset),
678  Raw(code=total_size)]))),
679  ExprStmt(child=Assign(target=Var(name=nullpos),
680  value=BinOp(x=Var(name=variable),
681  op=".",
682  y=FuncCall(name=Raw(code="find"),
683  args=[Raw(code="'\\0'")])))),
684  If(cond=BinOp(x=Var(name=nullpos), op="!=",
685  y=BinOp(x=Raw(code=t.cpp), op="::",
686  y=Raw(code="npos"))),
687  body=[ExprStmt(child=BinOp(x=Var(name=variable), op=".",
688  y=FuncCall(name=Raw(code="resize"),
689  args=[Raw(code=nullpos)])))])])
690  return tree
691 
692 def vl_string_body(t, depth=0, prefix="", base_offset="buf+offset",
693  variable=None):
694  """HDF5 Query: Represents the body for the VL_String primitive."""
695 
696  if variable == None:
697  variable = get_variable("x", depth=depth, prefix=prefix)
698 
699  tree = Block(nodes=[
700  ExprStmt(child=Assign(target=Var(name=variable),
701  value=FuncCall(name=Raw(code="VLRead"),
702  args=[Raw(code=base_offset)],
703  targs=[Raw(code=t.cpp), Raw(code=t.db)])))])
704  return tree
705 
706 def uuid_body(t, depth=0, prefix="", base_offset="buf+offset"):
707  """HDF5 Query: Represents the body for the boost::uuid primitive."""
708  x = get_variable("x", depth=depth, prefix=prefix)
709  total_size = get_variable("total_size", depth=depth, prefix=prefix)
710 
711  tree = Block(nodes=[
712  ExprStmt(child=FuncCall(name=Raw(code="memcpy"),
713  args=[Raw(code="&"+x),
714  Raw(code=base_offset),
715  Raw(code=total_size)]))])
716  return tree
717 
718 def vl_body(t, depth=0, prefix="", base_offset="buf+offset"):
719  """HDF5 Query: Represents the body for all C++ VL types."""
720  x = get_variable("x", depth=depth, prefix=prefix)
721  node = Block(nodes=[ExprStmt(child=Assign(target=Var(name=x),
722  value=FuncCall(name=Var(name="VLRead"),
723  args=[Raw(code=base_offset)],
724  targs=[Raw(code=t.cpp),
725  Raw(code=t.db)])))])
726  return node
727 
728 def map_body(t, depth=0, prefix="", base_offset="buf+offset"):
729  """HDF5 Query: Represents the body for C++ map type."""
730  x = get_variable("x", depth=depth, prefix=prefix)
731  k = get_variable("k", depth=depth, prefix=prefix)
732  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
733 
734  key = CANON_TO_NODE[t.canon[1]]
735  value = CANON_TO_NODE[t.canon[2]]
736 
737  key_prefix = prefix + template_args[t.canon[0]][0]
738  key_name = get_variable("x", depth=depth+1, prefix=key_prefix)
739  value_prefix = prefix + template_args[t.canon[0]][1]
740  value_name = get_variable("x", depth=depth+1, prefix=value_prefix)
741 
742  key_size = get_variable("total_size", depth=depth+1, prefix=key_prefix)
743  value_size = get_variable("total_size", depth=depth+1, prefix=value_prefix)
744 
745  item_size = "(" + key_size + "+" + value_size + ")"
746 
747  key_offset = base_offset + "+" + item_size + "*" + k
748  value_offset = key_offset + "+" + key_size
749 
750  node = Block(nodes=[
751  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
752  target=Var(name=k),
753  value=Raw(code="0")),
754  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
755  incr=LeftUnaryOp(op="++", name=Var(name=k)),
756  body=[
757  get_body(key, depth=depth+1, prefix=key_prefix,
758  base_offset=key_offset),
759  get_body(value, depth=depth+1, prefix=value_prefix,
760  base_offset=value_offset),
761  ExprStmt(child=Assign(target=Raw(code=x+"["+key_name+"]"),
762  value=Raw(code=value_name)))])])
763  return node
764 
765 def pair_body(t, depth=0, prefix="", base_offset="buf+offset"):
766  """HDF5 Query: Represents body for C++ pair type."""
767  x = get_variable("x", depth=depth, prefix=prefix)
768 
769  item1 = CANON_TO_NODE[t.canon[1]]
770  item2 = CANON_TO_NODE[t.canon[2]]
771 
772  item1_prefix = prefix + template_args[t.canon[0]][0]
773  item2_prefix = prefix + template_args[t.canon[0]][1]
774  item1_name = get_variable("x", depth=depth+1, prefix=item1_prefix)
775  item2_name = get_variable("x", depth=depth+1, prefix=item2_prefix)
776 
777  item1_size = get_variable("total_size", depth=depth+1, prefix=item1_prefix)
778  item2_size = get_variable("total_size", depth=depth+1, prefix=item2_prefix)
779 
780  item2_offset = base_offset + "+" + item1_size
781 
782  node = Block(nodes=[get_body(item1, depth=depth+1, prefix=item1_prefix,
783  base_offset=base_offset),
784  get_body(item2, depth=depth+1, prefix=item2_prefix,
785  base_offset=item2_offset),
786  ExprStmt(child=Assign(
787  target=Raw(code=x),
788  value=FuncCall(name=Raw(code="std::make_pair"),
789  args=[Raw(code=item1_name),
790  Raw(code=item2_name)])))])
791  return node
792 
793 def vector_primitive_body(t, depth=0, prefix="", base_offset="buf+offset"):
794  """HDF5 Query: Represents body of C++ Vector<primitive> types."""
795  x = get_variable("x", depth=depth, prefix=prefix)
796  k = get_variable("k", depth=depth, prefix=prefix)
797  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
798  total_size = get_variable("total_size", depth=depth, prefix=prefix)
799 
800  vector_start = "&" + x + "[0]"
801 
802  node = Block(nodes=[ExprStmt(child=Assign(target=Var(name=x),
803  value=FuncCall(
804  name=Raw(code=t.cpp),
805  args=[Raw(code=fieldlen)]))),
806  ExprStmt(child=FuncCall(name=Var(name="memcpy"),
807  args=[Raw(code=vector_start),
808  Raw(code=base_offset),
809  Raw(code=total_size)]))])
810  return node
811 
812 def vector_body(t, depth=0, prefix="", base_offset="buf+offset"):
813  """HDF5 Query: Represents body of C++ Vector<non-primitive> types."""
814  x = get_variable("x", depth=depth, prefix=prefix)
815  k = get_variable("k", depth=depth, prefix=prefix)
816  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
817  index = x + "[" + k + "]"
818 
819  child_prefix = get_prefix(prefix, t, 0)
820  child_var = get_variable("x", depth=depth+1, prefix=child_prefix)
821 
822  child_size = get_variable("total_size", depth=depth+1, prefix=child_prefix)
823  child_offset = base_offset + "+" + child_size + "*" + k
824 
825  node = Block(nodes=[
826  ExprStmt(child=Assign(target=Raw(code=x),
827  value=FuncCall(name=Raw(code=t.cpp),
828  args=[Raw(code=fieldlen)]))),
829  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
830  target=Var(name=k),
831  value=Raw(code="0")),
832  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
833  incr=LeftUnaryOp(op="++", name=Var(name=k)),
834  body=[
835  get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
836  prefix=child_prefix,
837  base_offset=child_offset),
838  ExprStmt(child=Assign(target=Var(name=index),
839  value=Raw(code=child_var)))
840  ])])
841  return node
842 
843 def vec_string_body(t, depth=0, prefix="", base_offset="buf+offset"):
844  """HDF5 Query: Represents body of C++ Vector<std::string> types."""
845  x = get_variable("x", depth=depth, prefix=prefix)
846  k = get_variable("k", depth=depth, prefix=prefix)
847  index = x + "[" + k + "]"
848  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
849 
850  string_prefix = get_prefix(prefix, t, 0)
851  child_size = get_variable("total_size", depth=depth+1, prefix=string_prefix)
852  child_offset = base_offset + "+" + child_size + "*" + k
853 
854  node = Block(nodes=[
855  ExprStmt(child=Assign(target=Raw(code=x),
856  value=FuncCall(name=Raw(code=t.cpp),
857  args=[Raw(code=fieldlen)]))),
858  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
859  target=Var(name=k),
860  value=Raw(code="0")),
861  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
862  incr=LeftUnaryOp(op="++", name=Var(name=k)),
863  body=[
864  string_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
865  prefix=string_prefix, base_offset=child_offset,
866  variable=index)
867  ])])
868  return node
869 
870 def set_primitive_body(t, depth=0, prefix="", base_offset="buf+offset"):
871  """HDF5 Query: Represents body of C++ set<primitive> types."""
872 
873  x = get_variable("x", depth=depth, prefix=prefix)
874  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
875  child_prefix = get_prefix(prefix, t, 0)
876  xraw = get_variable("xraw", depth=depth+1, prefix=child_prefix)
877 
878  xraw_type = CANON_TO_NODE[t.canon[1]].cpp + "*"
879 
880  node = Block(nodes=[ExprStmt(child=DeclAssign(
881  type=Type(cpp=xraw_type),
882  target=Var(name=xraw),
883  value=FuncCall(name=Raw(code="reinterpret_cast"),
884  targs=[Raw(code=xraw_type)],
885  args=[Raw(code=base_offset)]))),
886  ExprStmt(child=Assign(
887  target=Var(name=x),
888  value=FuncCall(name=Raw(code=t.cpp),
889  args=[Raw(code=xraw),
890  Raw(code=xraw
891  +"+"
892  +fieldlen)
893  ])))])
894  return node
895 
896 def set_body(t, depth=0, prefix="", base_offset="buf+offset"):
897  """HDF5 Query: Represents body of C++ set<non-primitive> types."""
898  x = get_variable("x", depth=depth, prefix=prefix)
899  k = get_variable("k", depth=depth, prefix=prefix)
900 
901  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
902 
903  child_prefix = get_prefix(prefix, t, 0)
904  child_var = get_variable("x", depth=depth+1, prefix=child_prefix)
905 
906  item_size = get_variable("total_size", depth=depth+1, prefix=child_prefix)
907 
908  child_offset = base_offset + "+" + item_size + "*" + k
909 
910  node = Block(nodes=[
911  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
912  target=Var(name=k),
913  value=Raw(code="0")),
914  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
915  incr=LeftUnaryOp(op="++", name=Var(name=k)),
916  body=[
917  get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
918  prefix=child_prefix, base_offset=child_offset),
919  ExprStmt(child=FuncCall(name=Raw(code=x+".insert"),
920  args=[Raw(code=child_var)]))])])
921  return node
922 
923 def set_string_body(t, depth=0, prefix="", base_offset="buf+offset"):
924  """HDF5 Query: Represents body of C++ set<std::string> types."""
925  x = get_variable("x", depth=depth, prefix=prefix)
926  k = get_variable("k", depth=depth, prefix=prefix)
927 
928  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
929 
930  string_prefix = get_prefix(prefix, t, 0)
931  string_size = get_variable("total_size", depth=depth+1,
932  prefix=string_prefix)
933  string_name = get_variable("x", depth=depth+1, prefix=string_prefix)
934 
935  offset = base_offset + "+" + string_size + "*" + k
936 
937  node = Block(nodes=[
938  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
939  target=Var(name=k),
940  value=Raw(code="0")),
941  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
942  incr=LeftUnaryOp(op="++", name=Var(name=k)),
943  body=[
944  string_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
945  prefix=string_prefix, base_offset=offset),
946  ExprStmt(child=FuncCall(name=Raw(code=x+".insert"),
947  args=[Raw(code=string_name)]))])])
948  return node
949 
950 def list_primitive_body(t, depth=0, prefix="", base_offset="buf+offset"):
951  """HDF5 Query: Represents body of C++ list<primitive> types."""
952  x = get_variable("x", depth=depth, prefix=prefix)
953  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
954  child_prefix = get_prefix(prefix, t, 0)
955  xraw = get_variable("xraw", depth=depth+1, prefix=child_prefix)
956 
957  xraw_type = CANON_TO_NODE[t.canon[1]].cpp + "*"
958 
959  node = Block(nodes=[ExprStmt(child=DeclAssign(
960  type=Type(cpp=xraw_type),
961  target=Var(name=xraw),
962  value=FuncCall(name=Raw(code="reinterpret_cast"),
963  targs=[Raw(code=xraw_type)],
964  args=[Raw(code=base_offset)]))),
965  ExprStmt(child=Assign(
966  target=Var(name=x),
967  value=FuncCall(name=Raw(code=t.cpp),
968  args=[Raw(code=xraw),
969  Raw(code=xraw
970  +"+"
971  +fieldlen)
972  ])))])
973  return node
974 
975 def list_body(t, depth=0, prefix="", base_offset="buf+offset"):
976  """HDF5 Query: Represents body of C++ list<non-primitive> types."""
977  x = get_variable("x", depth=depth, prefix=prefix)
978  k = get_variable("k", depth=depth, prefix=prefix)
979  child_prefix = get_prefix(prefix, t, 0)
980  child_variable = get_variable("x", depth=depth+1, prefix=child_prefix)
981  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
982  item_size = get_variable("total_size", depth=depth+1, prefix=child_prefix)
983  offset = base_offset + "+" + item_size + "*" + k
984 
985  node = Block(nodes=[
986  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
987  target=Var(name=k),
988  value=Raw(code="0")),
989  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
990  incr=LeftUnaryOp(op="++", name=Var(name=k)),
991  body=[
992  get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
993  prefix=child_prefix, base_offset=offset),
994  ExprStmt(child=FuncCall(name=Raw(code=x+".push_back"),
995  args=[Raw(code=child_variable)]))])])
996  return node
997 
998 BODIES = {"INT": reinterpret_cast_body,
999  "DOUBLE": reinterpret_cast_body,
1000  "FLOAT": reinterpret_cast_body,
1001  "BOOL": reinterpret_cast_body,
1002  "UUID": uuid_body,
1003  "STRING": string_body,
1004  "VL_STRING": vl_body,
1005  "BLOB": vl_body,
1006  "VECTOR_STRING": vec_string_body,
1007  "MAP": map_body,
1008  "PAIR": pair_body,
1009  "LIST_INT": list_primitive_body,
1010  "LIST_DOUBLE": list_primitive_body,
1011  "LIST_FLOAT": list_primitive_body,
1012  "LIST": list_body,
1013  "SET_INT": set_primitive_body,
1014  "SET_DOUBLE": set_primitive_body,
1015  "SET_FLOAT": set_primitive_body,
1016  "SET": set_body,
1017  "VECTOR_INT": vector_primitive_body,
1018  "VECTOR_DOUBLE": vector_primitive_body,
1019  "VECTOR_FLOAT": vector_primitive_body,
1020  "VECTOR": vector_body}
1021 
1022 def get_body(t, depth=0, prefix="", base_offset="buf+offset"):
1023  """HDF5 Query: Get body nodes for a C++ type.
1024 
1025  Parameters
1026  ----------
1027  t : Type
1028  C++ type, canonical form.
1029  depth : int, optional
1030  Depth relative to initial, depth 0 type.
1031  prefix : str, optional
1032  Current prefix, determined by parent type.
1033 
1034  Returns
1035  -------
1036  Node
1037  Body nodes required for the type.
1038  """
1039  block = []
1040  block.append(get_decl(t, depth=depth, prefix=prefix))
1041  if is_primitive(t):
1042  if depth == 0:
1043  block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1044  base_offset=base_offset))
1045  else:
1046  block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1047  base_offset=base_offset))
1048  elif DB_TO_VL[t.db]:
1049  block.append(vl_body(t, depth=depth, prefix=prefix,
1050  base_offset=base_offset))
1051  elif t.db in BODIES:
1052  block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1053  base_offset=base_offset))
1054  elif t.canon[0] in BODIES:
1055  block.append(BODIES[t.canon[0]](t, depth=depth, prefix=prefix,
1056  base_offset=base_offset))
1057  else:
1058  raise ValueError("No generation specified for type " + t.db)
1059  return Block(nodes=block)
1060 
1061 # teardown functions
1062 
1063 TEARDOWN_STACK = []
1064 VARS = []
1065 
1067  """Represents the generic close to an hdf5 type code block."""
1068  x = get_variable("x", depth=0, prefix="")
1069 
1070  tree = Block(nodes=[
1071  ExprStmt(child=Assign(target=Var(name="is_row_selected"),
1072  value=FuncCall(name=Var(name="CmpConds"),
1073  targs=[Raw(code=t.cpp)],
1074  args=[Raw(code="&"+x),
1075  Raw(code="&(field_conds[qr.fields[j]])")]))),
1076  If(cond=Var(name="is_row_selected"),
1077  body=[ExprStmt(child=Assign(target=Var(name="row[j]"),
1078  value=Var(name=x)))])])
1079 
1080  for i in range(len(TEARDOWN_STACK)):
1081  var_name = TEARDOWN_STACK.pop()
1082  teardown = ExprStmt(child=FuncCall(name=Var(name="H5Tclose"),
1083  args=[Raw(code=var_name)]))
1084  tree.nodes.append(teardown)
1085  return tree
1086 
1088  return normal_close(t)
1089 
1090 def indent(text, prefix, predicate=None):
1091  """This function copied from textwrap library version 3.3.
1092 
1093  Adds 'prefix' to the beginning of selected lines in 'text'.
1094  If 'predicate' is provided, 'prefix' will only be added to the lines
1095  where 'predicate(line)' is True. If 'predicate' is not provided,
1096  it will default to adding 'prefix' to all non-empty lines that do not
1097  consist solely of whitespace characters.
1098  """
1099  if predicate is None:
1100  def predicate(line):
1101  return line.strip()
1102 
1103  def prefixed_lines():
1104  for line in text.splitlines(True):
1105  yield (prefix + line if predicate(line) else line)
1106  return ''.join(prefixed_lines())
1107 
1108 def typeid(t):
1109  node = CANON_TO_NODE[t]
1110  return FuncCall(name=Raw(code="typeid"), args=[Raw(code=node.cpp)])
1111 
1112 def no_vl(t):
1113  if t.db != "BLOB" and DB_TO_VL[t.db]:
1114  return False
1115  else:
1116  if is_primitive(t):
1117  return True
1118  else:
1119  ret = True
1120  for i in t.canon[1:]:
1121  ret = ret and no_vl(CANON_TO_NODE[i])
1122  return ret
1123 
1124 def get_dim_shape(canon, start=0, depth=0):
1125  tshape = []
1126  i = 0
1127  if isinstance(canon, str):
1128  tshape = start + i
1129  i += 1
1130  if depth == 0:
1131  return i, [tshape]
1132  else:
1133  return i, tshape
1134  else:
1135  for u in canon:
1136  j, jshape = get_dim_shape(u, start=start+i, depth=depth+1)
1137  i += j
1138  tshape.append(jshape)
1139  return i, tshape
1140 
1141 def flatten(canon):
1142  if isinstance(canon, str):
1143  return [canon]
1144  result = list(canon)
1145  result[0] = canon
1146  i = 1
1147  while i < len(result):
1148  if isinstance(result[i], str):
1149  i += 1
1150  else:
1151  temp = result[i][1:]
1152  i += 1
1153  for j in range(0, len(temp)):
1154  result.insert(i+j, temp[j])
1155  return tuple(result)
1156 
1158  """HDF5 Create: Generate C++ if-statement condition for a given type.
1159 
1160  These if-statements are always a string of boolean expressions of the form
1161  'shape[n]<1' or 'shape[n]>=1', where n is an index into the C++ shape array.
1162  A shape index less than one (<1) denotes a variable length type, whereas an
1163  index greater than one (>=1) denotes fixed length type. These boolean
1164  expressions are joined by '&&' operators. For instance, a type of
1165  VL_MAP_VL_STRING_VL_STRING would receive the condition
1166  'shape[0]<1 && shape[1]<1 && shape[2]<1'.
1167 
1168  Parameters
1169  ----------
1170  t : Type
1171  C++ type for the boolean condition.
1172 
1173  Returns
1174  -------
1175  current_bool : BinOp
1176  Node representing the boolean condition.
1177  """
1178  vl_count = 0
1179  vl_potential_count = 0
1180  op_list = []
1181  shape_len, dim_shape = get_dim_shape(t.canon)
1182 
1183  flat_canon = flatten(t.canon)
1184  flat_shape = zip(flat_canon, [x for x in range(shape_len)])
1185 
1186  for sub_type, index in flat_shape:
1187  node = CANON_TO_NODE[sub_type]
1188  # This type is VL
1189  if DB_TO_VL[node.db]:
1190  vl_count += 1
1191  vl_potential_count += 1
1192  op_list.append(BinOp(x=Raw(code="shape["+str(index)+"]"),
1193  op="<", y=Raw(code="1")))
1194  # Find out if type could be VL
1195  else:
1196  orig_type = ORIGIN_DICT[sub_type]
1197  if is_primitive(CANON_TO_NODE[orig_type]):
1198  if VARIATION_DICT[orig_type]:
1199  vl_potential_count += 1
1200  op_list.append(BinOp(x=Raw(code="shape["+str(index)+"]"),
1201  op=">=", y=Raw(code="1")))
1202  else:
1203  if orig_type[0] in variable_length_types:
1204  vl_potential_count += 1
1205  op_list.append(BinOp(x=Raw(code="shape["+str(index)+"]"),
1206  op=">=", y=Raw(code="1")))
1207  current_bool = op_list[0]
1208  for i in range(1,len(op_list)):
1209  current_bool = BinOp(x=current_bool, op="&&", y=op_list[i])
1210 
1211  if vl_count == vl_potential_count:
1212  current_bool = BinOp(x=Raw(code="shape.empty()"), op="||",
1213  y=current_bool)
1214  return current_bool
1215 
1216 def VL_ADD_BLOCK(t, item_var):
1217  node = If(cond=BinOp(x=FuncCall(name=Raw(code="vldts_.count"),
1218  args=[Raw(code=t.db)]),
1219  op="==",
1220  y=Raw(code="0")),
1221  body=[ExprStmt(child=BinOp(
1222  x=Raw(code="vldts_["+t.db+"]"),
1223  op="=",
1224  y=Raw(code="H5Tvlen_create("+item_var+")"))),
1225  ExprStmt(child=FuncCall(
1226  name=Raw(code="opened_types_.insert"),
1227  args=[Raw(code="vldts_["+t.db+"]")]))])
1228  return node
1229 
1230 def print_statement(t, identifier):
1231  """Generate C++ print statement for debugging generated code."""
1232  msg_string = t.db + ": got here: " + str(identifier)
1233  return ExprStmt(child=Raw(code="std::cerr<<\"" + msg_string
1234  + "\" << std::endl"))
1235 
1237  """HDF5 Create: Generate C++ if-statement body for a given type.
1238 
1239  Called in coordination with get_variation_cond. For a given C++ type, this
1240  function returns the necessary C++ statements to create the HDF5 version
1241  of that type.
1242 
1243  Parameters
1244  ----------
1245  t : Type
1246  C++ type for which to create an if-statement body.
1247 
1248  Returns
1249  -------
1250  body : Block
1251  Node containing necessary C++ statements for HDF5 creation.
1252  """
1253  body = Block(nodes=[])
1254  # This handles types with non-standard bodies that we wish to
1255  # handle directly.
1256  if t.db in RAW_TYPES:
1257  return RAW_TYPES[t.db]
1258 
1259  body.nodes.append(ExprStmt(child=Raw(code="dbtypes[i]="+ t.db)))
1260 
1261  item_nodes, opened_types = get_item_type(t)
1262  body.nodes.append(item_nodes)
1263  type_var = opened_types[-1] if opened_types != [] else get_variable(
1264  "item_type",
1265  prefix="",
1266  depth=0)
1267 
1268  is_vl = True if DB_TO_VL[t.db] else False
1269  size_expression = Raw(code=get_item_size(t, vl_flag=is_vl))
1270  body.nodes.append(ExprStmt(child=BinOp(x=Raw(code="dst_sizes[i]"),
1271  op="=",
1272  y=size_expression)))
1273  if DB_TO_VL[t.db]:
1274  body.nodes.append(ExprStmt(child=BinOp(x=Raw(code="field_types[i]"),
1275  op="=",
1276  y=Raw(code="sha1_type_"))))
1277  else:
1278  body.nodes.append(ExprStmt(child=BinOp(x=Raw(code="field_types[i]"),
1279  op="=",
1280  y=Raw(code=type_var))))
1281  for opened in opened_types[:-1]:
1282  body.nodes.append(ExprStmt(child=FuncCall(
1283  name=Raw(code="opened_types_.insert"),
1284  args=[Raw(code=opened)])))
1285  if not is_primitive(t):
1286  body.nodes.append(ExprStmt(child=FuncCall(
1287  name=Raw(code="opened_types_.insert"),
1288  args=[Raw(code="field_types[i]")])))
1289  return body
1290 
1291 HDF5_PRIMITIVES = {"INT": "H5T_NATIVE_INT",
1292  "DOUBLE": "H5T_NATIVE_DOUBLE",
1293  "FLOAT": "H5T_NATIVE_FLOAT",
1294  "BOOL": "H5T_NATIVE_CHAR",
1295  "STRING": "CreateFLStrType({size})",
1296  "BLOB": "sha1_type_",
1297  "UUID": "uuid_type_"}
1298 
1299 PRIMITIVE_SIZES = {"INT": "sizeof(int)",
1300  "DOUBLE": "sizeof(double)",
1301  "FLOAT": "sizeof(float)",
1302  "BOOL": "sizeof(char)",
1303  "VL_STRING": "CYCLUS_SHA1_SIZE",
1304  "BLOB": "CYCLUS_SHA1_SIZE",
1305  "UUID": "CYCLUS_UUID_SIZE"}
1306 
1307 VL_TO_FL_CONTAINERS = {"VL_VECTOR": "VECTOR",
1308  "VL_SET": "SET",
1309  "VL_LIST": "LIST",
1310  "VL_MAP": "MAP"}
1311 
1312 def get_item_type(t, shape_array=None, vl_flag=False, prefix="", depth=0):
1313  """HDF5 Create: Build specified HDF5 type, recursively if necessary.
1314 
1315  HDF5 types are Primitive, Compound, or Array. We handle each of these cases
1316  here differently. Primitives are immediately returned by querying the
1317  HDF5_PRIMITIVES dictionary. Compound types are made up of multiple
1318  Primitive or Compound types, so each of these child types must be declared
1319  and created before the parent type can be created. This is accomplished via
1320  recursion over every child type in the type's canon. It should be noted that
1321  Compound types depend heavily on the size of those types they contain,
1322  and this function relies on get_item_size for that information. Finally,
1323  Arrays can contain one Primitive or Compound type, and are created by
1324  specifying this child type, Array dimensions, and the Array length.
1325 
1326  Parameters
1327  ----------
1328  t : Type
1329  Type node representing C++ type
1330  shape_array : list, optional
1331  Dimensioned list of current type shape
1332  prefix : str, optional
1333  Used to name C++ variables throughout multiple levels of recursion
1334  depth : int, optional
1335  Recursive depth counter
1336 
1337  Returns
1338  -------
1339  node : Block
1340  Cumulative collection of nodes necessary for specified item type
1341  opened_stack : list
1342  Cumulative collection of opened HDF5 types which must eventually be
1343  closed
1344 
1345  """
1346  # We need to keep a persistant shape array, unless initial call.
1347  if shape_array == None:
1348  shape_len, dim_shape = get_dim_shape(t.canon)
1349  else:
1350  dim_shape = shape_array
1351  node = Block(nodes=[])
1352  opened_stack = []
1353  type_var = get_variable("item_type", prefix=prefix, depth=depth)
1354  node.nodes.append(ExprStmt(child=Decl(type=Type(cpp="hid_t"),
1355  name=Var(name=type_var))))
1356  # Handle primitives
1357  if isinstance(t.canon, str):
1358  if DB_TO_VL[t.db] or (t.canon == "STRING" and vl_flag):
1359  node.nodes.append(ExprStmt(child=Assign(
1360  target=Var(name=type_var),
1361  value=Raw(code="sha1_type_"))))
1362  return node, opened_stack
1363  else:
1364  primitive_type = Raw(code=HDF5_PRIMITIVES[t.db].format(size="shape["
1365  +str(dim_shape[0])
1366  +"]"))
1367  node.nodes.append(ExprStmt(child=Assign(target=Var(name=type_var),
1368  value=primitive_type)))
1369  return node, opened_stack
1370  # Handle dependent types
1371  else:
1372  container_type = t.canon[0]
1373  canon_shape = list(zip(t.canon, dim_shape))
1374  is_vl = vl_flag
1375 
1376  if DB_TO_VL[t.db]:
1377  container_type = VL_TO_FL_CONTAINERS[t.canon[0]]
1378  is_vl = True
1379  else:
1380  if t.canon[0] in variable_length_types:
1381  shape_var = get_variable("shape0", prefix="", depth=depth+1)
1382  node.nodes.append(ExprStmt(
1383  child=DeclAssign(
1384  type=Type(cpp="hsize_t"),
1385  target=Var(name=shape_var),
1386  value=Raw(code="shape["
1387  +str(dim_shape[0])+"]"))))
1388  item_var = ""
1389  if len(canon_shape[1:]) == 1:
1390  # Not a compound type.
1391  item_canon, item_shape = canon_shape[1]
1392  # Get nodes initializing our child type
1393  child_array = (item_shape if isinstance(item_shape, list)
1394  else [item_shape])
1395  new_prefix = template_args[container_type][0]
1396  child_node, child_opened = get_item_type(CANON_TO_NODE[item_canon],
1397  shape_array=child_array,
1398  vl_flag=is_vl,
1399  prefix=new_prefix,
1400  depth=depth+1)
1401  pre_opened_len = len(opened_stack)
1402  node.nodes.append(child_node)
1403  opened_stack.extend(child_opened)
1404  if pre_opened_len < len(opened_stack):
1405  item_var = opened_stack[-1]
1406  else:
1407  item_var = get_variable("item_type", prefix=new_prefix,
1408  depth=depth+1)
1409 
1410  else:
1411  # This is a compound type.
1412  child_dict = OrderedDict()
1413  # 1. Get all child item type nodes, recursively.
1414  for i in range(1, len(canon_shape)):
1415  item_canon, item_shape = canon_shape[i]
1416  item_node = CANON_TO_NODE[item_canon]
1417  pre_opened_len = len(opened_stack)
1418  child_array = (item_shape if isinstance(item_shape, list)
1419  else [item_shape])
1420  new_prefix = template_args[container_type][i-1]
1421  child_node, child_opened = get_item_type(item_node,
1422  shape_array=child_array,
1423  vl_flag=is_vl,
1424  prefix=new_prefix,
1425  depth=depth+1)
1426  node.nodes.append(child_node)
1427  opened_stack.extend(child_opened)
1428 
1429  # if the previous opened stack and current stack are the same,
1430  # we know that the child is a primitive, and we can generate
1431  # its variable accordingly.
1432  if (DB_TO_VL[t.db] and item_canon[0] in variable_length_types):
1433  child_item_var = get_variable("item_type",
1434  prefix=new_prefix,
1435  depth=depth+1)
1436  elif DB_TO_VL[item_node.db]:
1437  child_item_var = get_variable("item_type",
1438  prefix=new_prefix,
1439  depth=depth+1)
1440  else:
1441  if len(opened_stack) == pre_opened_len:
1442  child_item_var = get_variable("item_type",
1443  prefix=new_prefix,
1444  depth=depth+1)
1445  # However, if the current opened stack is longer, the first new
1446  # variable there will be our child variable.
1447  else:
1448  child_item_var = opened_stack[-1]
1449  # 2. Get item sizes.
1450  child_dict[child_item_var] = get_item_size(item_node,
1451  child_array,
1452  vl_flag=is_vl,
1453  depth=depth+1)
1454  # 3. Create compound type using total item size.
1455  compound = hdf5_create_compound(list(child_dict.values()))
1456 
1457  item_var = get_variable("item_type", prefix=prefix+'compound', depth=depth+1)
1458  node.nodes.append(ExprStmt(child=Decl(type=Type(cpp="hid_t"),
1459  name=Raw(code=item_var))))
1460 
1461  node.nodes.append(ExprStmt(child=Assign(target=Raw(code=item_var),
1462  value=compound)))
1463 
1464  opened_stack.append(item_var)
1465  # 4. Insert individual children into the compound type.
1466  node.nodes.append(hdf5_insert(container_type, item_var, child_dict))
1467 
1468  if is_vl and t.canon not in NOT_VL:
1469  node.nodes.append(ExprStmt(child=Assign(target=Raw(code=type_var),
1470  value=Raw(code='sha1_type_'))))
1471  if vl_flag:
1472  node.nodes.append(VL_ADD_BLOCK(ORIGIN_TO_VL[ORIGIN_DICT[t.canon]], item_var))
1473  else:
1474  node.nodes.append(VL_ADD_BLOCK(t, item_var))
1475  opened_stack.append(type_var)
1476 
1477  elif container_type in variable_length_types and not DB_TO_VL[t.db]:
1478  array_node = ExprStmt(child=Assign(target=Var(name=type_var),
1479  value=hdf5_array_create(
1480  item_var,
1481  rank=1,
1482  dims="&"+shape_var)))
1483  opened_stack.append(type_var)
1484  node.nodes.append(array_node)
1485 
1486  return node, opened_stack
1487 
1488 def get_item_size(t, shape_array=None, vl_flag=False, depth=0):
1489  """Resolves item size recursively.
1490 
1491  We can dig down into a type until we reach eventual primitives, and then
1492  multiply the known sizes of those primitives by the lengths of their
1493  containers. Container length is defined in the C++ shape array.
1494 
1495  Parameters
1496  ----------
1497  t : Type
1498  The type whose size is in question
1499  shape_array : list, optional
1500  Dimensioned list of shape array indicies, same shape as t.canon
1501  depth : int, optional
1502  Recursive depth counter
1503 
1504  Returns
1505  -------
1506  size : str
1507  String of C++ expression representing t's size.
1508  """
1509  if shape_array == None:
1510  shape_array = get_dim_shape(t.canon)[1]
1511  if is_primitive(t):
1512  if t.db in PRIMITIVE_SIZES.keys():
1513  return PRIMITIVE_SIZES[t.db]
1514  else:
1515  if not vl_flag:
1516  return "shape[" + str(shape_array[0]) + "]"
1517  else:
1518  return "CYCLUS_SHA1_SIZE"
1519  else:
1520  size = "("
1521  if DB_TO_VL[t.db] or (vl_flag and t.canon not in NOT_VL):
1522  size += "CYCLUS_SHA1_SIZE"
1523  else:
1524  size += "("
1525  if len(t.canon[1:]) > 1:
1526  children = []
1527  for child_index in range(1, len(t.canon)):
1528  child_array = shape_array[child_index]
1529  if not isinstance(child_array, list):
1530  child_array = [child_array]
1531  children.append(get_item_size(
1532  CANON_TO_NODE[t.canon[child_index]],
1533  child_array,
1534  vl_flag=vl_flag,
1535  depth=depth+1))
1536  size += "+".join(children)
1537  else:
1538  child_array = shape_array[1]
1539  if not isinstance(child_array, list):
1540  child_array = [child_array]
1541  size += get_item_size(CANON_TO_NODE[t.canon[1]], child_array,
1542  vl_flag=vl_flag, depth=depth+1)
1543  size += ")"
1544  if t.canon[0] in variable_length_types:
1545  size += "*" + "shape[" + str(shape_array[0]) + "]"
1546  size += ")"
1547  return size
1548 
1549 def hdf5_array_create(item_variable, rank=1, dims="&shape0"):
1550  """Node representation of the C++ H5Tarray_create2 method.
1551 
1552  Parameters
1553  ----------
1554  item_variable : str
1555  Variable name of HDF5 array item.
1556  rank : int, optional
1557  Number of HDF5 array dimensions.
1558  dims : str, optional
1559  Variable (by reference) of shape array belonging to HDF5 array
1560 
1561  Returns
1562  -------
1563  node : FuncCall
1564  Node of H5Tarray_create2 function call.
1565  """
1566  node = FuncCall(name=Var(name="H5Tarray_create2"),
1567  args=[Raw(code=item_variable), Raw(code=str(rank)),
1568  Raw(code=dims)])
1569  return node
1570 
1572  """Node representation of the C++ HDF5 compound type creation function.
1573 
1574  Parameters
1575  ----------
1576  sizes : list
1577  List of type sizes, all must be str type.
1578 
1579  Returns
1580  -------
1581  node : FuncCall
1582  H5Tcreate function call node.
1583  """
1584  node = FuncCall(name=Var(name="H5Tcreate"), args=[Raw(code="H5T_COMPOUND"),
1585  Raw(code="+".join(sizes))])
1586  return node
1587 
1588 def hdf5_insert(container_type, compound_var, types_sizes_dict):
1589  """Node representation of the C++ H5Tinsert function.
1590 
1591  This function is used to identify partitions within an already established
1592  HDF5 Compound type. That is, we specify which inner types are located at
1593  what memory location within the Compound type.
1594 
1595  Parameters
1596  ----------
1597  container_type : str
1598  Should be a key in the template_args dict
1599  compound_var : str
1600  C++ variable to which the function should refer
1601  types_sizes_dict : dict
1602  Dictionary of C++ type variables mapped to their size in memory
1603 
1604  Returns
1605  -------
1606  node : Block
1607  Cumulative nodes for H5Tinsert function
1608  """
1609  node = Block(nodes=[])
1610  buf = str(0)
1611  keys = list(types_sizes_dict.keys())
1612  for i in range(len(types_sizes_dict)):
1613  type_var = keys[i]
1614  type_size = types_sizes_dict[type_var]
1615  descriptor = "\"" + template_args[container_type][i] + "\""
1616  func = FuncCall(name=Var(name="H5Tinsert"), args=[])
1617  func.args.append(Raw(code=compound_var))
1618  func.args.append(Raw(code=descriptor))
1619  func.args.append(Raw(code=buf))
1620  buf += "+" + type_size
1621  func.args.append(Raw(code=type_var))
1622  node.nodes.append(ExprStmt(child=func))
1623  return node
1624 
1626  """HDF5 Query: Generate Query case statement code."""
1627  CPPGEN = CppGen(debug=False)
1628  output = ""
1629  for type in CANON_TYPES:
1630  type_node = CANON_TO_NODE[type]
1631  setup = get_setup(type_node)
1632  body = get_body(type_node)
1633  teardown = get_teardown(type_node)
1634  read_x = Block(nodes=[setup, body, teardown])
1635  output += CPPGEN.visit(case_template(type_node, read_x))
1636  output = indent(output, INDENT * 5)
1637  return output
1638 
1639 io_error = Raw(code=("throw IOError(\"the type for column \'\"+"
1640  "std::string(field_names[i])+\"\' is not yet supported "
1641  "in HDF5.\");"))
1642 
1643 raw_string = Raw(code=("dbtypes[i]=STRING;\n"
1644  "field_types[i]=H5Tcopy(H5T_C_S1);\n"
1645  "H5Tset_size(field_types[i], shape[0]);\n"
1646  "H5Tset_strpad(field_types[i], H5T_STR_NULLPAD);\n"
1647  "opened_types_.insert(field_types[i]);\n"
1648  "dst_sizes[i]=sizeof(char)*shape[0];\n"))
1649 
1650 raw_blob = Raw(code=("dbtypes[i]=BLOB;\n"
1651  "field_types[i]=sha1_type_;\n"
1652  "dst_sizes[i]=CYCLUS_SHA1_SIZE;\n"))
1653 
1654 RAW_TYPES = {"STRING": raw_string,
1655  "BLOB": raw_blob}
1656 
1657 DEBUG_TYPES = ["VECTOR_STRING"]
1658 
1660  """HDF5 Create: Generate CreateTable if-statements."""
1661  CPPGEN = CppGen(debug=False)
1662  output = ""
1663 
1664  outer_if_bodies = OrderedDict()
1665  for n in VARIATION_DICT.keys():
1666  outer_if_bodies[n] = Block(nodes=[])
1667 
1668  for n in VARIATION_DICT.keys():
1669  variations = VARIATION_DICT[n][:]
1670  key_node = CANON_TO_NODE[n]
1671  try:
1672  initial_type = variations.pop()
1673  sub_if = If(cond=get_variation_cond(initial_type),
1674  body=[get_variation_body(initial_type)],
1675  elifs=[(get_variation_cond(v),
1676  [get_variation_body(v)])
1677  for v in variations],
1678  el=Block(nodes=[get_variation_body(key_node)]))
1679  outer_if_bodies[n].nodes.append(sub_if)
1680  except IndexError:
1681  lone_node = get_variation_body(key_node)
1682  outer_if_bodies[n].nodes.append(lone_node)
1683 
1684  shape_line = ExprStmt(child=Raw(code="shape=shapes[i]"))
1685 
1686  initial_node, initial_body = outer_if_bodies.popitem()
1687  if_statement = If(cond=BinOp(x=Var(name="valtype"), op="==",
1688  y=typeid(initial_node)),
1689  body=[shape_line, initial_body],
1690  elifs=[(BinOp(x=Var(name="valtype"), op="==",
1691  y=typeid(t)),
1692  [shape_line, outer_if_bodies[t]])
1693  for t in outer_if_bodies.keys()],
1694  el=io_error)
1695  output += CPPGEN.visit(if_statement)
1696  output = indent(output, INDENT)
1697  return output
1698 
1699 def camel_case(db):
1700  parts = db.split("_")
1701  for i in range(len(parts)):
1702  parts[i] = parts[i].capitalize()
1703  return "".join(parts)
1704 
1705 def string(s):
1706  return "\"" + s + "\""
1707 
1709  """HDF5 VL_DATASET: Generate the VLDataset function code."""
1710  CPPGEN = CppGen()
1711  output = ""
1712  origin_types = list(VARIATION_DICT.keys())
1713  for origin in origin_types:
1714  vals = [v.canon for v in VARIATION_DICT[origin] if DB_TO_VL[v.db]]
1715  origin_node = CANON_TO_NODE[origin]
1716  case_body = Block()
1717  if vals == []:
1718  if DB_TO_VL[origin_node.db]:
1719  vals.append(origin)
1720  else:
1721  continue
1722  for v in vals:
1723  node = Assign(
1724  target=Var(name="name"),
1725  value=Raw(code=string(camel_case(origin_node.db))))
1726  case_body = ExprStmt(child=node)
1727  output += CPPGEN.visit(case_template(CANON_TO_NODE[v], case_body))
1728 
1729  output = indent(output, INDENT*2)
1730  return output
1731 
1733  """HDF5 FILL_BUF: Generates the FillBuf function code."""
1734  CPPGEN = CppGen()
1735  output = ""
1736  for i in CANON_TYPES:
1737  node = CANON_TO_NODE[i]
1738  write_to_buf = FuncCall(name=Var(name="WriteToBuf"),
1739  targs=[Raw(code=node.db)],
1740  args=[Raw(code="buf+offset"),
1741  Raw(code="shapes[col]"),
1742  Raw(code="a"), Raw(code="sizes[col]")])
1743  case_body = ExprStmt(child=write_to_buf)
1744  output += CPPGEN.visit(case_template(node, case_body))
1745  output = indent(output, INDENT*4)
1746  return output
1747 
1748 vl_write_vl_string = """hasher_.Clear();
1749 hasher_.Update({var});
1750 Digest {key} = hasher_.digest();
1751 hid_t {keysds} = VLDataset({t.db}, true);
1752 hid_t {valsds} = VLDataset({t.db}, false);
1753 if (vlkeys_[{t.db}].count({key}) != 1) {{
1754  AppendVLKey({keysds}, {t.db}, {key});
1755  InsertVLVal({valsds}, {t.db}, {key}, {var});
1756 }}\n"""
1757 
1758 vl_write_blob = """hasher_.Clear();
1759 hasher_.Update({var});
1760 Digest {key} = hasher_.digest();
1761 hid_t {keysds} = VLDataset({t.db}, true);
1762 hid_t {valsds} = VLDataset({t.db}, false);
1763 if (vlkeys_[{t.db}].count({key}) != 1) {{
1764  AppendVLKey({keysds}, {t.db}, {key});
1765  InsertVLVal({valsds}, {t.db}, {key}, ({var}).str());
1766 }}\n"""
1767 
1768 VL_SPECIAL_TYPES = {"VL_STRING": vl_write_vl_string,
1769  "BLOB": vl_write_blob}
1770 
1771 def vl_write(t, variable, depth=0, prefix="", pointer=False):
1772  """HDF5 Write: Return code previously found in VLWrite."""
1773  buf_variable = get_variable("buf", depth=depth, prefix=prefix)
1774  key_variable = get_variable("key", depth=depth, prefix=prefix)
1775  keysds_variable = get_variable("keysds", depth=depth, prefix=prefix)
1776  valsds_variable = get_variable("valsds", depth=depth, prefix=prefix)
1777  if pointer:
1778  variable = "*" + variable
1779  node_str = ""
1780  if t.db in VL_SPECIAL_TYPES:
1781  node_str = VL_SPECIAL_TYPES[t.db]
1782  else:
1783  node_str = """hasher_.Clear();
1784 hasher_.Update({var});
1785 Digest {key} = hasher_.digest();
1786 hid_t {keysds} = VLDataset({t.db}, true);
1787 hid_t {valsds} = VLDataset({t.db}, false);
1788 if (vlkeys_[{t.db}].count({key}) != 1) {{
1789  hvl_t {buf} = VLValToBuf({var});
1790  AppendVLKey({keysds}, {t.db}, {key});
1791  InsertVLVal({valsds}, {t.db}, {key}, {buf});
1792 }}\n"""
1793  node = Raw(code=node_str.format(var=variable, no_p_var=variable.strip("*"),
1794  key=key_variable,
1795  keysds=keysds_variable, t=t,
1796  valsds=valsds_variable,
1797  buf=buf_variable))
1798  return node
1799 
1800 def memcpy(dest, src, size):
1801  """HDF5 Write: Node representation of memcpy function."""
1802  return ExprStmt(child=FuncCall(name=Var(name="memcpy"), args=[Raw(code=dest),
1803  Raw(code=src),
1804  Raw(code=size)]))
1805 
1806 def memset(dest, src, size):
1807  """HDF5 Write: Node representation of memset function."""
1808  return ExprStmt(child=FuncCall(name=Var(name="memset"), args=[Raw(code=dest),
1809  Raw(code=src),
1810  Raw(code=size)]))
1811 
1812 def a_cast(t, depth=0, prefix=""):
1813  """HDF5 Write: Node representation of boost hold_any casting."""
1814  cast = Block(nodes=[])
1815  val = get_variable("val", depth=depth, prefix=prefix)
1816  if (is_primitive(t) and t.db not in WRITE_BODY_PRIMITIVES
1817  and not DB_TO_VL[t.db]):
1818  cast.nodes.append(ExprStmt(child=Decl(type=Type(cpp="const void*"),
1819  name=Var(name=val))))
1820  cast_string = "a->castsmallvoid()"
1821  cast.nodes.append(ExprStmt(child=Assign(target=Var(name=val),
1822  value=Raw(code=cast_string))))
1823  else:
1824  cast_string = "a->cast<" + t.cpp + ">()"
1825  cast.nodes.append(ExprStmt(child=DeclAssign(
1826  type=t,
1827  target=Var(name=val),
1828  value=Raw(code=cast_string))))
1829  return cast
1830 
1831 def get_write_setup(t, shape_array, depth=0, prefix=""):
1832  """HDF5 Write: Creates setup variables (lengths, sizes) for function body.
1833 
1834  This function recursively declares the sizes, lengths and other necessary
1835  variables for the parent and children types. Called by get_write_body.
1836 
1837  Parameters
1838  ----------
1839  t : Type
1840  shape_array : list
1841  depth : int
1842  prefix : str
1843 
1844  Returns
1845  -------
1846  setup : Block
1847  """
1848  setup = Block(nodes=[])
1849  variable = get_variable("item_size", depth=depth, prefix=prefix)
1850  setup.nodes.append(ExprStmt(child=DeclAssign(type=Type(cpp="size_t"),
1851  target=Var(name=variable),
1852  value=Raw(code=get_item_size(t,
1853  shape_array)))))
1854  if t.db == "STRING":
1855  valuelen = get_variable("valuelen", depth=depth, prefix=prefix)
1856  setup.nodes.append(ExprStmt(child=Decl(type=Type(cpp="size_t"),
1857  name=Var(name=(valuelen)))))
1858  if is_primitive(t):
1859  return setup
1860  else:
1861  # Setup prefixes and container-level length variable.
1862  container = t.canon[0]
1863  if DB_TO_VL[t.db]:
1864  container = VL_TO_FL_CONTAINERS[container]
1865  elif t.canon[0] in variable_length_types:
1866  length = get_variable("length", depth=depth, prefix=prefix)
1867  setup.nodes.append(ExprStmt(child=DeclAssign(
1868  type=Type(cpp="size_t"),
1869  target=Var(name=length),
1870  value=Raw(code=
1871  "shape["
1872  +str(shape_array[0])
1873  +"]"))))
1874  prefixes = template_args[container]
1875 
1876  # Add setup of any children.
1877  for c, s, p in zip(t.canon[1:], shape_array[1:], prefixes):
1878  node = CANON_TO_NODE[c]
1879  if isinstance(s, int):
1880  s = [s]
1881  setup.nodes.append(get_write_setup(node, s, depth=depth+1,
1882  prefix=prefix+p))
1883  total_item_size = get_variable("total_item_size", depth=depth,
1884  prefix=prefix)
1885 
1886  # Put together total_item_size variable.
1887  children = []
1888  for i in range(len(t.canon[1:])):
1889  children.append(get_variable("item_size", depth=depth+1,
1890  prefix=prefix+prefixes[i]))
1891  if len(children) == 1:
1892  setup.nodes.append(ExprStmt(child=DeclAssign(
1893  type=Type(cpp="size_t"),
1894  target=Var(name=total_item_size),
1895  value=Raw(code=children[0]))))
1896  else:
1897  setup.nodes.append(ExprStmt(child=DeclAssign(
1898  type=Type(cpp="size_t"),
1899  target=Var(name=total_item_size),
1900  value=Raw(
1901  code="+".join(children)))))
1902  # remove elements that exceed column.
1903  if depth == 0 and not DB_TO_VL[t.db]:
1904  if container in variable_length_types:
1905  variable = get_variable("val", depth=depth, prefix=prefix)
1906  init = t.cpp + "::iterator eraseit=" + variable + ".begin()"
1907  advance = "std::advance(eraseit, column/" + total_item_size + ")"
1908  erase = variable + ".erase(eraseit," + variable + ".end())"
1909  column_check = If(cond=BinOp(x=Raw(code=total_item_size+"*"
1910  +variable+".size()"),
1911  op=">", y=Raw(code='column')),
1912  body=[ExprStmt(child=Raw(code=init)),
1913  ExprStmt(child=Raw(code=advance)),
1914  ExprStmt(child=Raw(code=erase))])
1915  setup.nodes.append(column_check)
1916 
1917  return setup
1918 
1919 def write_body_string(t, depth=0, prefix="", variable=None, offset="buf",
1920  pointer=False):
1921  """HDF5 Write: Specialization of the write_body function for STRING type"""
1922  if variable is None:
1923  variable = get_variable("val", depth=depth, prefix=prefix)
1924  node = Block(nodes=[])
1925  size = "->size()" if pointer else ".size()"
1926  c_str = "->c_str()" if pointer else ".c_str()"
1927  valuelen = get_variable("valuelen", depth=depth, prefix=prefix)
1928  item_size = get_variable("item_size", depth=depth, prefix=prefix)
1929  node.nodes.append(ExprStmt(child=Assign(target=Var(name=valuelen),
1930  value=FuncCall(
1931  name=Raw(code="std::min"),
1932  args=[
1933  Raw(code=variable+size),
1934  Raw(code=item_size)]))))
1935  node.nodes.append(memcpy(offset, variable+c_str, valuelen))
1936  node.nodes.append(memset(offset+"+"+valuelen, "0", item_size+"-"+valuelen))
1937  return node
1938 
1939 def write_body_uuid(t, depth=0, prefix="", variable=None, offset="buf",
1940  pointer=False):
1941  """HDF5 Write: Specialization of the write_body function for UUID type"""
1942  if variable is None:
1943  variable = get_variable("val", depth=depth, prefix=prefix)
1944  node = Block(nodes=[])
1945  size = get_variable("item_size", depth=depth, prefix=prefix)
1946  if pointer:
1947  variable = "*" + variable
1948  variable = "&(" + variable + ")"
1949  node.nodes.append(memcpy(offset, variable, size))
1950  return node
1951 
1952 def write_body_primitive(t, depth=0, prefix="", variable=None, offset="buf",
1953  pointer=False):
1954  """HDF5 Write: Specialization of the write_body function for primitives"""
1955  if variable is None:
1956  variable = get_variable("val", depth=depth, prefix=prefix)
1957  node = Block(nodes=[])
1958  size = get_variable("item_size", depth=depth, prefix=prefix)
1959  if depth == 0:
1960  size = "column"
1961  else:
1962  if pointer:
1963  variable = "*" + variable
1964  variable = "&(" + variable + ")"
1965  node.nodes.append(memcpy(offset, variable, size))
1966  return node
1967 
1968 WRITE_BODY_PRIMITIVES = {"STRING": write_body_string,
1969  "UUID": write_body_uuid}
1970 
1971 CONTAINER_INSERT_STRINGS = {"MAP": "{var}[{child0}] = {child1}",
1972  "LIST": "{var}.push_back({child0})",
1973  "SET": "{var}.insert({child0})",
1974  "VECTOR": "{var}.push_back({child0})",
1975  "PAIR": "{var} = std::make_pair({child0},{child1})"}
1976 
1977 def is_all_vl(t):
1978  """HDF5 Write: Determines if type is entirely VL.
1979 
1980  A type is entirely VL if the top level type is VL, as well as all children
1981  that have the potential to be VL. This means that VL_VECTOR_INT will return
1982  True here, but VL_VECTOR_STRING will return False.
1983 
1984  Parameters
1985  ----------
1986  t : Type
1987 
1988  Returns
1989  -------
1990  result : bool
1991  True if type is entirely VL, else False
1992  """
1993  if is_primitive(t):
1994  if DB_TO_VL[t.db]:
1995  return True
1996  result = False
1997  flat = flatten(t.canon)
1998  for i in range(len(flat)):
1999  canon = flat[i]
2000  node = CANON_TO_NODE[canon]
2001  if DB_TO_VL[node.db]:
2002  result = True
2003  continue
2004  else:
2005  if i == 0:
2006  break
2007  elif node.canon in NOT_VL:
2008  continue
2009  else:
2010  result = False
2011  break
2012  return result
2013 
2014 def pad_children(t, variable, fixed_var=None, depth=0, prefix="", called_depth=0):
2015  """HDF5 Write: Pads FL children of VL parent types.
2016 
2017  This function is used on top-level VL container types which contain 1 or
2018  more FL child types (i.e. VL_VECTOR_STRING). These children should be
2019  padded to their max length if they do not already meet it. This is done
2020  recursively.
2021 
2022  Parameters
2023  ----------
2024  t : Type
2025  variable : str
2026  fixed_var : None or str, optional
2027  depth : int, optional
2028  prefix : str, optional
2029  called_depth : int, optional
2030  Records the origin depth to determine when we're at relative depth=0
2031 
2032  Returns
2033  -------
2034  result : Block
2035  Nodes required for padding
2036  """
2037  if DB_TO_VL[t.db]:
2038  container = VL_TO_FL_CONTAINERS[t.canon[0]]
2039  else:
2040  container = t.canon[0]
2041  result = Block(nodes=[])
2042  body_nodes = []
2043  keywords = {}
2044  # Depth 0 should have no specified fixed_var variable. If this type is a
2045  # child, we'll want to use the child variable that was created for it
2046  # by its parent.
2047  if fixed_var == None:
2048  fixed_var = get_variable("fixed_val", depth=depth, prefix=prefix)
2049  result.nodes.append(ExprStmt(child=Decl(type=Type(cpp=t.cpp),
2050  name=Raw(code=fixed_var))))
2051  iterator = get_variable("it", depth=depth, prefix=prefix)
2052  pad_count = get_variable("pad_count", depth=depth, prefix=prefix)
2053  result.nodes.append(ExprStmt(child=DeclAssign(
2054  type=Type(cpp='unsigned int'),
2055  target=Var(name=pad_count),
2056  value=Raw(code='0'))))
2057  prefixes = template_args[container]
2058  keywords['var'] = fixed_var
2059  num = len(t.canon[1:])
2060  if num == 1:
2061  children = ["(*" + iterator + ")"]
2062  else:
2063  if container in variable_length_types:
2064  members = ['->first', '->second']
2065  children = ["{}{}".format(a, b) for a, b in zip([iterator]*num,
2066  members)]
2067  else:
2068  members = ['.first', '.second']
2069  children = ["{}{}".format(a, b) for a, b in zip([variable]*num,
2070  members)]
2071  count = 0
2072  for i in t.canon[1:]:
2073  child_node = CANON_TO_NODE[i]
2074  child_keyword = "child" + str(count)
2075  child_variable = get_variable("child", depth=depth+1,
2076  prefix=prefix+prefixes[count])
2077  if is_primitive(child_node):
2078  # Strings are the only primitive we are looking for
2079  item_size = get_variable("item_size", depth=depth+1,
2080  prefix=prefix+prefixes[count])
2081  if child_node.db == 'STRING':
2082  constructor = ("std::string(" + children[count] + ",0,"
2083  + item_size + ")")
2084  body_nodes.append(ExprStmt(child=DeclAssign(
2085  type=child_node,
2086  target=Var(
2087  name=child_variable),
2088  value=Raw(
2089  code=constructor))))
2090  keywords[child_keyword] = child_variable
2091  # Leave other primitives alone.
2092  else:
2093  keywords[child_keyword] = children[count]
2094  else:
2095  # All VL containers
2096  if DB_TO_VL[child_node.db]:
2097  if is_all_vl(child_node):
2098  # Skip child
2099  keywords[child_keyword] = children[count]
2100  else:
2101  # Recursion for VL containers may work this way
2102  body_nodes.append(pad_children(child_node, children[count],
2103  fixed_var=child_variable,
2104  depth=depth+1,
2105  prefix=prefix
2106  +prefixes[count],
2107  called_depth=called_depth))
2108  keywords[child_keyword] = child_variable
2109  # FL variable length containers
2110  elif child_node.canon[0] in variable_length_types:
2111  child_length = get_variable("length", depth=depth+1,
2112  prefix=prefix+prefixes[count])
2113  child_pad_count = get_variable("pad_count", depth=depth+1,
2114  prefix=prefix+prefixes[count])
2115  body_nodes.append(pad_children(child_node, children[count],
2116  fixed_var=child_variable,
2117  depth=depth+1,
2118  prefix=prefix+prefixes[count],
2119  called_depth=called_depth))
2120  # attempt to resize container
2121  if child_node.canon[0] == 'VECTOR':
2122  body_nodes.append(ExprStmt(child=Raw(code=child_variable+".resize("+child_length+")")))
2123  size = "(" + child_length + "-" + child_pad_count + ")" + "*" + item_size
2124  body_nodes.append(memset("&"+child_variable, str(0), size))
2125  keywords[child_keyword] = child_variable
2126  # PAIRS, etc.
2127  else:
2128  # Recursive call on the PAIR, using the parent iterator as the
2129  # new origin variable. We specify that the fixed variable should
2130  # simply be the 'child' variable we created in this loop.
2131  body_nodes.append(pad_children(child_node, children[count],
2132  fixed_var=child_variable,
2133  depth=depth+1,
2134  prefix=prefix+prefixes[count],
2135  called_depth=called_depth))
2136  keywords[child_keyword] = child_variable
2137  count += 1
2138 
2139  assignment = CONTAINER_INSERT_STRINGS[container].format(**keywords)
2140  body_nodes.append(ExprStmt(child=Raw(code=assignment)))
2141  if container in variable_length_types:
2142  body_nodes.append(ExprStmt(child=Raw(code="++" + pad_count)))
2143  if depth > called_depth:
2144  result.nodes.append(ExprStmt(child=DeclAssign(
2145  type=Type(cpp=t.cpp
2146  +"::iterator"),
2147  target=Raw(code=iterator),
2148  value=Raw(code=variable
2149  +".begin()"))))
2150  result.nodes.append(For(cond=BinOp(x=Var(name=iterator), op="!=",
2151  y=Var(name=variable+".end()")),
2152  incr=Raw(code="++" + iterator),
2153  body=body_nodes))
2154  else:
2155  result.nodes.extend(body_nodes)
2156  return result
2157 
2158 def get_write_body(t, shape_array, depth=0, prefix="", variable="a",
2159  offset="buf", pointer=False):
2160  """HDF5 Write: Generates the body of the WriteToBuf function definition.
2161 
2162  Parameters
2163  ----------
2164  t : Type
2165  Node representing the desired C++ type
2166  shape_array : list
2167  Dimensioned list of shape array indicies corresponding to types in
2168  t.canon
2169  depth : int, optional
2170  Recursive depth
2171  prefix : str, optional
2172  Used for recursive variable naming convention
2173  variable : str, optional
2174  Name of the type's C++ variable
2175  offset : str, optional
2176  Location of current memory offset
2177  pointer : bool, optional
2178  Denotes if current variable is a pointer, and whether member access
2179  should be performed via arrow or dot notation
2180 
2181  Returns
2182  -------
2183  result : Block
2184  Nodes required for body of the function definition
2185  """
2186  result = Block(nodes=[])
2187  # Determine if type is entirely variable length
2188  all_vl = is_all_vl(t)
2189  # Declare and assign the 'val' variable
2190  if depth == 0:
2191  variable = get_variable("val", depth=depth, prefix=prefix)
2192  result.nodes.append(a_cast(t))
2193  result.nodes.append(get_write_setup(t, shape_array))
2194 
2195  # If entirely variable length, we can simply use the VLWrite definition
2196  if all_vl:
2197  result.nodes.append(vl_write(t, variable, depth=depth, prefix=prefix,
2198  pointer=pointer))
2199  key = get_variable("key", depth=depth, prefix=prefix)
2200  result.nodes.append(memcpy(offset, key + ".val", "CYCLUS_SHA1_SIZE"))
2201  return result
2202  # Handle primitive bodies
2203  if is_primitive(t):
2204  if t.db in WRITE_BODY_PRIMITIVES:
2205  result.nodes.append(WRITE_BODY_PRIMITIVES[t.db](t, depth=depth,
2206  prefix=prefix,
2207  variable=variable,
2208  offset=offset,
2209  pointer=pointer))
2210  else:
2211  result.nodes.append(write_body_primitive(t, depth=depth,
2212  prefix=prefix,
2213  variable=variable,
2214  offset=offset,
2215  pointer=pointer))
2216  return result
2217  # Handle potentially variable length bodies
2218  else:
2219  # Declare count and iterator variables for the loop.
2220  count = get_variable("count", depth=depth, prefix=prefix)
2221  result.nodes.append(ExprStmt(child=DeclAssign(
2222  type=Type(cpp="unsigned int"),
2223  target=Var(name=count),
2224  value=Raw(code="0"))))
2225  iterator = get_variable("it", depth=depth, prefix=prefix)
2226  total_size = get_variable("total_item_size", depth=depth, prefix=prefix)
2227  # Recursively gather child bodies
2228  child_bodies = []
2229  container = t.canon[0]
2230  # Handle variable length container
2231  if DB_TO_VL[t.db]:
2232  container = VL_TO_FL_CONTAINERS[container]
2233  prefixes = template_args[container]
2234  result.nodes.append(ExprStmt(child=DeclAssign(
2235  type=Type(cpp=t.cpp+"::iterator"),
2236  target=Raw(code=iterator),
2237  value=Raw(code=variable
2238  +".begin()"))))
2239  result.nodes.append(pad_children(t, variable, depth=depth,
2240  prefix=prefix, called_depth=depth))
2241  fixed_val = get_variable("fixed_val", depth=depth, prefix=prefix)
2242  result.nodes.append(vl_write(t, fixed_val, depth=depth,
2243  prefix=prefix))
2244  key = get_variable("key", depth=depth, prefix=prefix)
2245  result.nodes.append(memcpy(offset, key + ".val", "CYCLUS_SHA1_SIZE"))
2246  # Handle fixed length containers
2247  else:
2248  new_variable = variable
2249  if container in variable_length_types:
2250  new_variable = iterator
2251  prefixes = template_args[container]
2252  if len(t.canon[1:]) == 1:
2253  child_node = CANON_TO_NODE[t.canon[1]]
2254  child_size = get_variable("item_size", depth=depth+1,
2255  prefix=prefix+prefixes[0])
2256  child_bodies.append(get_write_body(child_node, shape_array[1],
2257  depth=depth+1,
2258  prefix=prefix+prefixes[0],
2259  variable=new_variable,
2260  offset=offset+"+"+child_size
2261  +"*"+count,
2262  pointer=True))
2263  else:
2264  partial_size = "0"
2265  if container in variable_length_types:
2266  labels = ['->first', '->second']
2267  elif pointer:
2268  labels = ['->first', '->second']
2269  else:
2270  labels = ['.first', '.second']
2271  for c, s, p, l in zip(t.canon[1:], shape_array[1:], prefixes,
2272  labels):
2273  child_node = CANON_TO_NODE[c]
2274  item_label = new_variable+l
2275  child_size = get_variable("item_size", depth=depth+1,
2276  prefix=prefix+p)
2277  child_bodies.append(get_write_body(child_node, s,
2278  depth=depth+1,
2279  prefix=prefix+p,
2280  variable=item_label,
2281  offset=offset+"+("+count
2282  +"*"+total_size
2283  +")+"
2284  +partial_size,
2285  pointer=False))
2286  partial_size += "+" + child_size
2287  if container in variable_length_types:
2288  labels = ['->first', '->second']
2289  # For loop uses child bodies
2290  result.nodes.append(ExprStmt(child=DeclAssign(
2291  type=Type(cpp=t.cpp
2292  +"::iterator"),
2293  target=Raw(code=new_variable),
2294  value=Raw(code=variable
2295  +".begin()"))))
2296  child_bodies.append(ExprStmt(child=LeftUnaryOp(op="++",
2297  name=Var(
2298  name=count))))
2299  result.nodes.append(For(cond=BinOp(x=Var(name=new_variable),
2300  op="!=",
2301  y=Var(name=variable
2302  +".end()")),
2303  incr=Raw(code="++" + new_variable),
2304  body=child_bodies))
2305  # Add memset statement outside of loop
2306  container_length = get_variable("length", depth=depth,
2307  prefix=prefix)
2308  dest = offset + "+" + total_size + "*" + count
2309  length = (total_size + "*" + "(" + container_length + "-"
2310  + count + ")")
2311  if depth == 0:
2312  result.nodes.append(If(cond=BinOp(
2313  x=Raw(code=total_size+"*"
2314  +container_length),
2315  op="<", y=Raw(code="column")),
2316  body=[memset(dest, str(0), length)]))
2317  else:
2318  result.nodes.append(memset(dest, str(0), length))
2319  else:
2320  result.nodes.extend(child_bodies)
2321  return result
2322 
2324  """HDF5 Write: Generate the WriteToBuf templated function definitions."""
2325  CPPGEN = CppGen(debug=False)
2326  output = ""
2327  for i in CANON_TYPES:
2328  block = Block(nodes=[])
2329  t = CANON_TO_NODE[i]
2330  node = FuncDef(type=Raw(code="void"),
2331  name=Var(name="Hdf5Back::WriteToBuf"),
2332  targs=[Raw(code=t.db)],
2333  args=[Decl(type=Type(cpp="char*"), name=Var(name="buf")),
2334  Decl(type=Type(cpp="std::vector<int>&"),
2335  name=Var(name="shape")),
2336  Decl(type=Type(
2337  cpp="const boost::spirit::hold_any*"),
2338  name=Var(name="a")),
2339  Decl(type=Type(cpp="size_t"),
2340  name=Var(name="column"))],
2341  body=[get_write_body(t, get_dim_shape(t.canon)[1])],
2342  tspecial=True)
2343  block.nodes.append(node)
2344  output += CPPGEN.visit(block)
2345  return output
2346 
2347 def to_from_buf_setup(t, depth=0, prefix="", spec=None):
2348  """HDF5 VAL_TO_BUF and BUF_TO_VAL: Generate setup for both functions.
2349 
2350  This setup is to be called one time for each type. It returns nodes for
2351  initial buffer/type declaration, item sizes, and a potentially dimensioned
2352  list describing which child types within the initial container t are VL.
2353  These are denoted by a 1, where fixed-length primitive types are denoted by
2354  a 0. Fixed-length containers (i.e. pairs) are denoted by a nested list of
2355  1's and 0's.
2356 
2357  Parameters
2358  ----------
2359  t : Type
2360  C++ type node.
2361  depth : int, optional
2362  Recursive depth counter, used for variable names.
2363  prefix : str, optional
2364  Current prefix, used for variable name uniqueness.
2365  spec : str or None, optional
2366  Determines whether extra nodes are added for VAL_TO_BUF or BUF_TO_VAL
2367 
2368  Returns
2369  -------
2370  node : Block
2371  All setup nodes.
2372  vl_list : list
2373  Potentially dimensioned list cooresponding to child types, with values
2374  of 0 and 1 representing FL and VL types, respectively.
2375  """
2376  node = Block(nodes=[])
2377  # Handle specializations for VLValToBuf and VLBufToVal functions.
2378  if depth == 0:
2379  if spec == 'TO_BUF':
2380  node.nodes.append(ExprStmt(child=Decl(type=Type(cpp="hvl_t"),
2381  name=Var(name="buf"))))
2382  node.nodes.append(ExprStmt(child=Assign(
2383  target=Var(name="buf.len"),
2384  value=Raw(code="x.size()"))))
2385  elif spec == 'TO_VAL':
2386  val = get_variable('x', depth=depth, prefix=prefix)
2387  node.nodes.append(ExprStmt(child=Decl(type=t, name=Var(name=val))))
2388  node.nodes.append(ExprStmt(child=DeclAssign(
2389  type=Type(cpp='char*'),
2390  target=Var(name='p'),
2391  value=reinterpret_cast(
2392  Type(cpp='char'),
2393  'buf.p'))))
2394  child_sizes = OrderedDict()
2395  container = t.canon[0]
2396  prefixes = template_args[container]
2397  children = t.canon[1:]
2398  vl_list = []
2399  # Iterate, determine sizes and whether type is VL
2400  for c, p in zip(children, prefixes):
2401  child_node = CANON_TO_NODE[c]
2402  variable = get_variable("item_size", depth=depth+1, prefix=prefix+p)
2403  if is_primitive(child_node):
2404  if child_node.db == "BLOB":
2405  child_sizes[variable] = "CYCLUS_SHA1_SIZE"
2406  vl_list.append(1)
2407  elif VARIATION_DICT[c]:
2408  child_sizes[variable] = "CYCLUS_SHA1_SIZE"
2409  vl_list.append(1)
2410  else:
2411  child_sizes[variable] = PRIMITIVE_SIZES[child_node.db]
2412  vl_list.append(0)
2413  elif child_node.canon[0] in variable_length_types:
2414  child_sizes[variable] = "CYCLUS_SHA1_SIZE"
2415  vl_list.append(1)
2416  else:
2417  child_sizes[variable] = get_variable("total_item_size", depth+1,
2418  prefix=prefix+p)
2419  new_node, new_list = to_from_buf_setup(child_node, depth=depth+1,
2420  prefix=prefix+p)
2421  node.nodes.append(new_node)
2422  vl_list.append(new_list)
2423  # Unpack and declare all child sizes.
2424  for k, v in child_sizes.items():
2425  node.nodes.append(ExprStmt(child=DeclAssign(type=Type(cpp="size_t"),
2426  target=Raw(code=k),
2427  value=Raw(code=v))))
2428  total_var = get_variable("total_item_size", depth=depth, prefix=prefix)
2429  node.nodes.append(ExprStmt(child=DeclAssign(
2430  type=Type(cpp="size_t"),
2431  target=Raw(code=total_var),
2432  value=Raw(
2433  code="+".join(child_sizes.keys())))))
2434  # Further specializations.
2435  if depth == 0:
2436  if spec == 'TO_BUF':
2437  node.nodes.append(ExprStmt(child=DeclAssign(
2438  type=Type(cpp="size_t"),
2439  target=Var(name="nbytes"),
2440  value=Raw(code=total_var
2441  +"*buf.len"))))
2442  node.nodes.append(ExprStmt(child=Assign(
2443  target=Var(name="buf.p"),
2444  value=Raw(
2445  code="new char[nbytes]"))))
2446  return node, vl_list
2447 
2448 def to_buf_body(t, vl_list, depth=0, prefix="", variable=None,
2449  offset="reinterpret_cast<char*>(buf.p)"):
2450  """HDF5 VAL_TO_BUF: Generates the body of the VLValToBuf function.
2451 
2452  The VLValToBuf function creates a new VL buffer from an original C++ data
2453  type. All potentially variable length types are passed to VLWrite and a
2454  SHA1 hash is added to the buffer in place of the actual type data.
2455  Primitives and remaining container types are written as-is to the buffer.
2456 
2457  Parameters
2458  ----------
2459  t : Type
2460  Node representing current C++ type
2461  vl_list : list
2462  Potentially dimensioned list of 1's and 0's, corresponding to each
2463  child type and whether it is variable length or not, respectively.
2464  depth : int, optional
2465  Current recursive depth, used for naming variables.
2466  prefix : str, optional
2467  Current variable prefix, used to ensure unique variable names.
2468  variable : str, optional
2469  Current container variable name.
2470  offset : str or None, optional
2471  Current offset into data.
2472 
2473  Returns
2474  -------
2475  block : Block
2476  Nodes representing the body.
2477  """
2478  if variable == None:
2479  variable = 'x'
2480  block = Block(nodes=[])
2481  total_size_var = get_variable("total_item_size", depth=depth, prefix=prefix)
2482  prefixes = template_args[t.canon[0]]
2483  children = t.canon[1:]
2484  loop_block = Block(nodes=[])
2485  new_offset = offset
2486  # If a container is VL and has multiple children, we'll need to use pointer
2487  # notation to access the child data. Otherwise, we can use normal dot
2488  # notation.
2489  if t.canon[0] in variable_length_types:
2490  count_var = get_variable("count", depth=depth, prefix=prefix)
2491  block.nodes.append(ExprStmt(child=DeclAssign(type=Type(
2492  cpp="unsigned int"),
2493  target=Var(name=count_var),
2494  value=Raw(code="0"))))
2495  iter_var = get_variable("it", depth=depth, prefix=prefix)
2496  block.nodes.append(ExprStmt(child=DeclAssign(
2497  type=Type(cpp=t.cpp
2498  +"::const_iterator"),
2499  target=Var(name=iter_var),
2500  value=Raw(code=variable
2501  +".begin()"))))
2502  new_variable = iter_var
2503  labels = ['->first', '->second']
2504  new_offset += "+(" + total_size_var + "*" + count_var + ")"
2505  else:
2506  labels = ['.first', '.second']
2507  new_variable = variable
2508  # Containers with only one child can simply use the iterator to reference
2509  # their child data.
2510  if len(children) == 1:
2511  new_variable = "(*" + new_variable + ")"
2512  labels = ['']
2513  for child, part, vl, label in zip(children, prefixes, vl_list, labels):
2514  child_node = CANON_TO_NODE[child]
2515  child_var = new_variable + label
2516  item_size = get_variable("item_size", depth=depth+1, prefix=prefix+part)
2517  # For variable length types we must use VLWrite to get the SHA1
2518  if vl == 1:
2519  loop_block.nodes.append(vl_write(ORIGIN_TO_VL[child], child_var,
2520  depth=depth+1, prefix=prefix+part))
2521  key_var = get_variable("key", depth=depth+1, prefix=prefix+part)
2522  loop_block.nodes.append(memcpy(new_offset, key_var+".val",
2523  item_size))
2524  # Other primitives can be copied
2525  elif vl == 0:
2526  loop_block.nodes.append(memcpy(new_offset, "&("+child_var+")",
2527  item_size))
2528  # Other containers must be handled recursively.
2529  else:
2530  loop_block.nodes.append(to_buf_body(child_node, vl, depth=depth+1,
2531  prefix=prefix+part, variable=child_var,
2532  offset=new_offset))
2533  # Update current offset.
2534  new_offset += "+" + item_size
2535 
2536  # For variable length containers, add the for loop.
2537  if t.canon[0] in variable_length_types:
2538  block.nodes.append(For(cond=BinOp(x=Var(name=iter_var), op="!=",
2539  y=Var(name=variable+".end()")),
2540  incr=Raw(code="++"+iter_var),
2541  body=[loop_block,
2542  ExprStmt(child=LeftUnaryOp(
2543  op="++",
2544  name=Var(
2545  name=count_var)))]))
2546  # Other containers don't need a loop (i.e. pair).
2547  else:
2548  block.nodes.append(loop_block)
2549  if depth == 0:
2550  block.nodes.append(ExprStmt(child=Raw(code="return buf")))
2551  return block
2552 
2554  """HDF5 VAL_TO_BUF: Generates VLValToBuf function."""
2555  CPPGEN = CppGen(debug=False)
2556  output = ""
2557  block = Block(nodes=[])
2558  for i in VARIATION_DICT:
2559  if VARIATION_DICT[i]:
2560  t = CANON_TO_NODE[i]
2561  if t.canon[0] in variable_length_types:
2562  setup, vl_list = to_from_buf_setup(t, spec='TO_BUF')
2563  node = FuncDef(type=Type(cpp="hvl_t"),
2564  name=Var(name="Hdf5Back::VLValToBuf"),
2565  args=[Decl(type=Type(cpp="const "+t.cpp+"&"),
2566  name=Var(name="x"))],
2567  body=[setup, to_buf_body(t, vl_list)])
2568  block.nodes.append(node)
2569  output += CPPGEN.visit(block)
2570  return output
2571 
2573  """HDF5 VAL_TO_BUF_H: Generates header declarations for VLValToBuf function."""
2574  CPPGEN = CppGen()
2575  output = ""
2576  block = Block(nodes=[])
2577  for i in VARIATION_DICT:
2578  if VARIATION_DICT[i]:
2579  node = CANON_TO_NODE[i]
2580  if node.canon[0] in variable_length_types:
2581  decl = ExprStmt(child=Decl(type=Type(cpp="hvl_t"),
2582  name=FuncCall(
2583  name=Var(name="VLValToBuf"),
2584  args=[Decl(type=Type(
2585  cpp="const "
2586  +node.cpp
2587  +"&"),
2588  name=Raw(code="x"))])))
2589  block.nodes.append(decl)
2590  output += CPPGEN.visit(block)
2591  output = indent(output, INDENT)
2592  return output
2593 
2594 def vl_read(t, offset):
2595  """Representation of C++ VLRead function.
2596 
2597  Parameters
2598  ----------
2599  t : Type
2600  C++ type node.
2601  offset : str
2602  Memory location of SHA1 hash.
2603 
2604  Returns
2605  -------
2606  node : FuncCall
2607  The final function call.
2608  """
2609  node = FuncCall(name=Var(name='VLRead'),
2610  targs=[Raw(code=t.cpp),
2611  Raw(code=ORIGIN_TO_VL[t.canon].db)],
2612  args=[Raw(code=offset)])
2613  return node
2614 
2615 def reinterpret_cast(t, offset, deref=False):
2616  """Representation of C++ reinterpret_cast function.
2617 
2618  Parameters
2619  ----------
2620  t : Type
2621  C++ type to cast as.
2622  offset : str
2623  Memory location of the data to cast.
2624  deref : bool, optional
2625  Should the function be dereferenced? (This returns the newly casted
2626  data, rather than a pointer)
2627 
2628  Returns
2629  -------
2630  node : FuncCall
2631  The final function call.
2632  """
2633  if deref:
2634  func_name = '*reinterpret_cast'
2635  else:
2636  func_name = 'reinterpret_cast'
2637  node = FuncCall(name=Var(name=func_name),
2638  targs=[Raw(code=t.cpp+'*')], args=[Raw(code=offset)])
2639  return node
2640 
2641 def to_val_body(t, vl_list, depth=0, prefix='', variable='x0', offset=None):
2642  """Generates the body of the VLBufToVal function.
2643 
2644  The VLBufToVal function is responsible for reading the bytes of a VL buffer
2645  back into a C++ value. Importantly, we assume that all types which have the
2646  capability of being VL *are* VL. When we encounter one of these types, we
2647  call VLRead, passing in the respective SHA1 hash value. Otherwise, we read
2648  in the fixed length number of bytes associated with the type.
2649 
2650  Parameters
2651  ----------
2652  t : Type
2653  Node representing current C++ type
2654  vl_list : list
2655  Potentially dimensioned list of 1's and 0's, corresponding to each
2656  child type and whether it is variable length or not, respectively.
2657  depth : int, optional
2658  Current recursive depth, used for naming variables.
2659  prefix : str, optional
2660  Current variable prefix, used to ensure unique variable names.
2661  variable : str, optional
2662  Current container variable name.
2663  offset : str or None, optional
2664  Current offset into buffer.
2665 
2666  Returns
2667  -------
2668  block : Block
2669  Nodes representing the body.
2670  """
2671  block = Block(nodes=[])
2672  child_count = 0
2673  # argument dict, for unpacking later in str formatting
2674  args = {}
2675  args['var'] = variable
2676  total_item_size = get_variable('total_item_size', depth=depth,
2677  prefix=prefix)
2678  count = get_variable('count', depth=depth, prefix=prefix)
2679  # set default offset if none given
2680  if offset == None:
2681  offset = 'p+' + "(" + total_item_size + '*' + count + ")"
2682  container = t.canon[0]
2683  loop_block = Block(nodes=[])
2684  for child, part, vl in zip(t.canon[1:], template_args[t.canon[0]], vl_list):
2685  type_node = CANON_TO_NODE[child]
2686  child_var = get_variable('x', depth=depth+1, prefix=prefix+part)
2687  child_size = get_variable('item_size', depth=depth+1,
2688  prefix=prefix+part)
2689  child_arg = 'child' + str(child_count)
2690  # any variable length type can be resolved using VLRead, even containers
2691  if vl == 1:
2692  loop_block.nodes.append(ExprStmt(child=DeclAssign(
2693  type=type_node,
2694  target=Var(name=child_var),
2695  value=vl_read(type_node,
2696  offset))))
2697  # read in primitive, fixed-length types
2698  elif vl == 0:
2699  loop_block.nodes.append(ExprStmt(child=DeclAssign(
2700  type=type_node,
2701  target=Var(name=child_var),
2702  value=reinterpret_cast(
2703  type_node,
2704  offset,
2705  deref=True))))
2706  # structures which are neither primitive nor VL, i.e. std::pairs, must
2707  # be handled recursively
2708  else:
2709  loop_block.nodes.append(ExprStmt(child=Decl(
2710  type=type_node,
2711  name=Var(name=child_var))))
2712  loop_block.nodes.append(to_val_body(type_node, vl, depth=depth+1,
2713  prefix=prefix+part,
2714  offset=offset,
2715  variable=child_var))
2716  args[child_arg] = child_var
2717  offset += "+" + child_size
2718  child_count += 1
2719  # This is the expression which adds a type to a given container, formatted
2720  # with the variables associated with child types. This must be placed in a
2721  # for loop if the container is variable length.
2722  container_expr = CONTAINER_INSERT_STRINGS[container].format(**args)
2723  if container in variable_length_types:
2724  block.nodes.append(ExprStmt(child=DeclAssign(type=Type(
2725  cpp='unsigned int'),
2726  target=Var(name=count),
2727  value=Raw(code='0'))))
2728  block.nodes.append(For(cond=BinOp(x=Var(name=count), op='<',
2729  y=Var(name='buf.len')),
2730  incr=Raw(code='++'+count),
2731  body=[loop_block,
2732  ExprStmt(child=Raw(code=container_expr))]))
2733  else:
2734  block.nodes.append(loop_block)
2735  block.nodes.append(ExprStmt(child=Raw(code=container_expr)))
2736  if depth == 0:
2737  block.nodes.append(ExprStmt(child=Raw(code='return ' + variable)))
2738  return block
2739 
2741  """HDF5 BUF_TO_VAL: Generates the VLBufToVal function code."""
2742  CPPGEN = CppGen()
2743  output = ""
2744  block = Block(nodes=[])
2745  for i in VARIATION_DICT:
2746  if VARIATION_DICT[i]:
2747  t = CANON_TO_NODE[i]
2748  if t.canon[0] in variable_length_types:
2749  setup, vl_list = to_from_buf_setup(t, spec='TO_VAL')
2750  node = FuncDef(type=Type(cpp=t.cpp),
2751  name=Var(name="Hdf5Back::VLBufToVal"),
2752  targs=[Raw(code=t.cpp)],
2753  args=[Decl(type=Type(cpp="const hvl_t&"),
2754  name=Var(name="buf"))],
2755  body=[setup, to_val_body(t, vl_list)],
2756  tspecial=True)
2757  block.nodes.append(node)
2758  output += CPPGEN.visit(block)
2759  return output
2760 
2761 def setup():
2762  global NOT_VL
2763  global MAIN_DISPATCH
2764 
2765  with open(os.path.join(os.path.dirname(__file__), '..', 'share',
2766  'dbtypes.json')) as f:
2767  raw_table = resolve_unicode(json.load(f))
2768 
2769  version = ""
2770  table_start = 0
2771  table_end = 0
2772  for row in range(len(raw_table)):
2773  current = tuple(raw_table[row])
2774  if current[4] == "HDF5":
2775  if current[5] > version:
2776  version = current[5]
2777  table_start = row
2778  if current[5] == version:
2779  table_end = row
2780 
2781  types_table = list(tuple(row) for row in raw_table[table_start:table_end+1])
2782 
2783  for row in types_table:
2784  if row[6] == 1 and row[4] == "HDF5" and row[5] == version:
2785  db = row[1]
2786  cpp = row[2]
2787  canon = convert_canonical(row[7])
2788  if canon not in CANON_TYPES:
2789  CANON_TYPES.append(canon)
2790  DB_TO_CPP[db] = cpp
2791  CANON_TO_DB[canon] = db
2792  CANON_TO_NODE[canon] = Type(cpp=cpp, db=db, canon=canon)
2793  DB_TO_VL[db] = row[8]
2794 
2795  fixed_length_types = []
2796  for n in CANON_TYPES:
2797  if no_vl(CANON_TO_NODE[n]) and n not in fixed_length_types:
2798  fixed_length_types.append(n)
2799 
2800  for n in fixed_length_types:
2801  key = CANON_TO_NODE[n]
2802  vals = []
2803  for x in CANON_TYPES:
2804  val_node = CANON_TO_NODE[x]
2805  if val_node.cpp == key.cpp and val_node.db != key.db:
2806  vals.append(val_node)
2807  VARIATION_DICT[n] = vals
2808 
2809  VARIATION_DICT['BLOB'] = []
2810  VARIATION_DICT['STRING'] = [CANON_TO_NODE['VL_STRING']]
2811 
2812  for i in VARIATION_DICT:
2813  ORIGIN_DICT[i] = i
2814  if VARIATION_DICT[i] != []:
2815  for j in VARIATION_DICT[i]:
2816  ORIGIN_DICT[j.canon] = i
2817 
2818  for i in VARIATION_DICT.keys():
2819  node = CANON_TO_NODE[i]
2820  if DB_TO_VL[node.db]:
2821  continue
2822  if not is_primitive(node):
2823  if i[0] not in variable_length_types:
2824  NOT_VL.append(i)
2825  for j in VARIATION_DICT[i]:
2826  NOT_VL.append(j.canon)
2827  if not VARIATION_DICT[i]:
2828  NOT_VL.append(i)
2829  NOT_VL = set(NOT_VL)
2830 
2831  for n in CANON_TYPES:
2832  node = CANON_TO_NODE[n]
2833  if is_all_vl(node):
2834  ORIGIN_TO_VL[ORIGIN_DICT[n]] = node
2835 
2836  MAIN_DISPATCH = {"QUERY": main_query,
2837  "CREATE": main_create,
2838  "VL_DATASET": main_vl_dataset,
2839  "FILL_BUF": main_fill_buf,
2840  "WRITE": main_write,
2841  "VAL_TO_BUF_H": main_val_to_buf_h,
2842  "VAL_TO_BUF": main_val_to_buf,
2843  "BUF_TO_VAL": main_buf_to_val}
2844 
2845 def main():
2846  try:
2847  gen_instruction = sys.argv[1]
2848  except:
2849  raise ValueError("No generation instruction provided")
2850 
2851  # Setup for global util dictionaries
2852  setup()
2853 
2854  # Dispatch to requested generation function
2855  function = MAIN_DISPATCH[gen_instruction]
2856  print(function())
2857 
2858 if __name__ == '__main__':
2859  main()
def get_decl(t, depth=0, prefix="")
def visit_var(self, node)
def visit_case(self, node)
def reinterpret_cast(t, offset, deref=False)
def flatten(canon)
def write_body_uuid(t, depth=0, prefix="", variable=None, offset="buf", pointer=False)
def get_write_setup(t, shape_array, depth=0, prefix="")
def visit_type(self, node)
def visit_funcdef(self, node)
def visit_funccall(self, node)
def resolve_unicode(item)
def to_buf_body(t, vl_list, depth=0, prefix="", variable=None, offset="reinterpret_cast<char*>(buf.p)")
def list_body(t, depth=0, prefix="", base_offset="buf+offset")
def a_cast(t, depth=0, prefix="")
def __init__(self, tree=None)
def vector_body(t, depth=0, prefix="", base_offset="buf+offset")
def visit_block(self, node)
def write_body_string(t, depth=0, prefix="", variable=None, offset="buf", pointer=False)
def hdf5_array_create(item_variable, rank=1, dims="&shape0")
def visit_if(self, node)
def set_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")
def hdf5_create_compound(sizes)
def set_body(t, depth=0, prefix="", base_offset="buf+offset")
def to_from_buf_setup(t, depth=0, prefix="", spec=None)
def get_setup(t, depth=0, prefix="", HDF5_type="tb_type", child_index='j')
def case_template(t, read_x)
def vl_read(t, offset)
def vl_body(t, depth=0, prefix="", base_offset="buf+offset")
def memcpy(dest, src, size)
def visit_nothing(self, node)
def reinterpret_cast_body(t, depth=0, prefix="", base_offset="buf+offset")
def visit_assign(self, node)
def to_val_body(t, vl_list, depth=0, prefix='', variable='x0', offset=None)
def __init__(self, tree=None, indent=' ')
def write_body_primitive(t, depth=0, prefix="", variable=None, offset="buf", pointer=False)
def get_write_body(t, shape_array, depth=0, prefix="", variable="a", offset="buf", pointer=False)
def get_dim_shape(canon, start=0, depth=0)
def vl_string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None)
def map_body(t, depth=0, prefix="", base_offset="buf+offset")
def VL_ADD_BLOCK(t, item_var)
def visit_leftunaryop(self, node)
def convert_canonical(raw_list)
def memset(dest, src, size)
def vec_string_body(t, depth=0, prefix="", base_offset="buf+offset")
def uuid_body(t, depth=0, prefix="", base_offset="buf+offset")
def set_string_body(t, depth=0, prefix="", base_offset="buf+offset")
def string_setup(depth=0, prefix="")
def indent(text, prefix, predicate=None)
def primitive_setup(t, depth=0, prefix="")
def visit_exprstmt(self, node)
def visit_rightunaryop(self, node)
def pair_body(t, depth=0, prefix="", base_offset="buf+offset")
def hdf5_insert(container_type, compound_var, types_sizes_dict)
def list_dependencies(canon)
def main_val_to_buf_h()
std::string capitalize(std::string s)
Returns a capitalized copy of the string.
Definition: pyne.cc:231
def vl_write(t, variable, depth=0, prefix="", pointer=False)
def visit_for(self, node)
def print_statement(t, identifier)
def get_variation_cond(t)
def get_variation_body(t)
def list_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")
def get_variable(name, depth=0, prefix="")
def pad_children(t, variable, fixed_var=None, depth=0, prefix="", called_depth=0)
def get_prefix(base_prefix, parent_type, child_index)
def visit_declassign(self, node)
def visit_decl(self, node)
def get_item_size(t, shape_array=None, vl_flag=False, depth=0)
def __init__(self, kwargs)
def visit_raw(self, node)
def vl_string_setup(depth=0, prefix="")
def get_item_type(t, shape_array=None, vl_flag=False, prefix="", depth=0)
def string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None)
def visit_binop(self, node)
def get_body(t, depth=0, prefix="", base_offset="buf+offset")
def camel_case(db)
def __init__(self, tree=None, indent=' ', debug=False)
def visit(self, node=None)
def vector_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")