CYCLUS
hdf5_back_gen.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 """This module generates HDF5 backend code found in src/hdf5_back.cc"""
3 import os
4 import sys
5 import json
6 from pprint import pformat
7 from itertools import chain
8 from collections import OrderedDict
9 
10 is_primitive = lambda t: isinstance(t.canon, str)
11 
12 class Node(object):
13  fields = ()
14 
15  def __init__(self, **kwargs):
16  seen = set()
17  for field, value in kwargs.items():
18  if field not in self.fields:
19  print(field, " is not a valid field")
20  raise RuntimeError
21  setattr(self, field, value)
22  seen.add(field)
23  for field in self.fields:
24  if field not in seen:
25  setattr(self, field, None)
26 
27  def __str__(self):
28  return PrettyFormatter(self).visit()
29 
30 class Block(Node):
31  fields = ("nodes",)
32 
33 class Var(Node):
34  fields = ("name",)
35 
36 class Type(Node):
37  fields = ("cpp", "db", "canon")
38 
39 class Decl(Node):
40  fields = ("type", "name")
41 
42 class Expr(Node):
43  fields = ("value",)
44 
45 class ExprStmt(Node):
46  fields = ("child",)
47 
48 class Line(Node):
49  fields = ("child",)
50 
51 class Assign(Node):
52  fields = ("target", "value")
53 
55  fields = ("type", "target", "value")
56 
57 class Case(Node):
58  fields = ("cond", "body")
59 
60 class If(Node):
61  fields = ("cond", "body", "elifs", "el")
62 
63 class For(Node):
64  fields = ("adecl", "cond", "incr", "body")
65 
66 class BinOp(Node):
67  fields = ("x", "op", "y")
68 
70  fields = ("op", "name")
71 
73  fields = ("name", "op")
74 
75 class FuncCall(Node):
76  # targs means template args
77  fields = ("name", "args", "targs")
78 
79 class Raw(Node):
80  # for cheating and literals
81  fields = ("code",)
82 
83 class Nothing(Node):
84  # for "nothing"
85  fields = ()
86 
87 _lowername = lambda cls: cls.__name__.lower()
88 
89 class Visitor(object):
90  """Super-class for all classes that should walk over a tree of nodes.
91  This implements the visit() method.
92  """
93 
94  def __init__(self, tree=None):
95  self.tree = tree
96 
97  def visit(self, node=None):
98  """Walks over a node. If no node is provided, the tree is used."""
99  if node is None:
100  node = self.tree
101  if node is None:
102  raise RuntimeError('no node or tree given!')
103  for clsname in map(_lowername, type.mro(node.__class__)):
104  meth = getattr(self, 'visit_' + clsname, None)
105  if callable(meth):
106  rtn = meth(node)
107  break
108  else:
109  msg = 'could not find valid visitor method for {0} on {1}'
110  nodename = node.__class__.__name__
111  selfname = self.__class__.__name__
112  msg = msg.format(nodename, selfname)
113  try:
114  msg += "\n"
115  msg += str(node)
116  except Exception:
117  pass
118  raise AttributeError(msg)
119  return rtn
120 
122  """Formats a tree of nodes into a pretty string"""
123 
124  def __init__(self, tree=None, indent=' '):
125  super(PrettyFormatter, self).__init__(tree=tree)
126  self.level = 0
127  self.indent = indent
128 
129  def visit_node(self, node):
130  s = node.__class__.__name__ + '('
131  if len(node.fields) == 0:
132  return s + ')'
133  s += '\n'
134  self.level += 1
135  t = []
136  for field in node.fields:
137  a = getattr(node, field)
138  t.append(self.visit(a) if isinstance(a, Node) else pformat(a))
139  t = ['{0}={1}'.format(n, x) for n, x in zip(node.fields, t)]
140  s += indent(',\n'.join(t), self.indent)
141  self.level -= 1
142  s += '\n)'
143  return s
144 
146  def __init__(self, tree=None, indent=' '):
147  super(CppGen, self).__init__(tree=tree)
148  self.level = 0
149  self.indent = indent
150 
151  def visit_var(self, node):
152  return node.name
153 
154  def visit_type(self, node):
155  return node.cpp
156 
157  def visit_decl(self, node):
158  s = self.visit(node.type)
159  s += " "
160  s += self.visit(node.name)
161  return s
162 
163  def visit_exprstmt(self, node):
164  s = self.visit(node.child)
165  s += ";\n"
166  return s
167 
168  def visit_assign(self, node):
169  s = self.visit(node.target)
170  s += "="
171  s += self.visit(node.value)
172  return s
173 
174  def visit_declassign(self, node):
175  s = self.visit(node.type)
176  s += " "
177  s += self.visit(node.target)
178  s += "="
179  s += self.visit(node.value)
180  return s
181 
182  def visit_binop(self, node):
183  s = self.visit(node.x)
184  # s += " "
185  s += node.op
186  # s += " "
187  s += self.visit(node.y)
188  return s
189 
190  def visit_leftunaryop(self, node):
191  s = node.op
192  s += self.visit(node.name)
193  return s
194 
195  def visit_rightunaryop(self, node):
196  s = self.visit(node.name)
197  s += node.op
198  return s
199 
200  def visit_raw(self, node):
201  s = node.code
202  return s
203 
204  def visit_case(self, node):
205  s = "case "
206  s += self.visit(node.cond)
207  s += ": {\n"
208  for n in node.body:
209  s += indent(self.visit(n), self.indent)
210  s += "\n}\n"
211  return s
212 
213  def visit_if(self, node):
214  s = "if("
215  s += self.visit(node.cond)
216  s += "){\n"
217  for n in node.body:
218  s += indent(self.visit(n), self.indent)
219  s += "\n}"
220  # check if elifs is an empty list
221  if node.elifs:
222  for cond, body in node.elifs:
223  s += "else if("
224  s += self.visit(cond)
225  s += "){\n"
226  for n in body:
227  b = ""
228  b += self.visit(n)
229  s += indent(b, self.indent)
230  s += "\n}"
231  # check if else attribute exists
232  if node.el is not None:
233  s += "else{\n"
234  s += indent(self.visit(node.el), self.indent)
235  s += "\n}"
236  return s + "\n"
237 
238  def visit_for(self, node):
239  s = "for("
240  if node.adecl is not None:
241  s += self.visit(node.adecl) + ";"
242  else:
243  s += ";"
244  s += self.visit(node.cond)
245  s += ";"
246  s += self.visit(node.incr)
247  s += "){\n"
248  for n in node.body:
249  s += indent(self.visit(n), self.indent)
250  s += "\n}\n"
251  return s
252 
253  def visit_funccall(self, node):
254  s = self.visit(node.name)
255  if node.targs is not None:
256  s += "<"
257  for i in range(len(node.targs)):
258  s += self.visit(node.targs[i])
259  if i < (len(node.targs)-1):
260  s += ","
261  s += ">"
262  s += "("
263  for i in range(len(node.args)):
264  s += self.visit(node.args[i])
265  if i < (len(node.args)-1):
266  s += ","
267  s += ")"
268  return s
269 
270  def visit_nothing(self, node):
271  return ""
272 
273  def visit_block(self, node):
274  s = ""
275  for n in node.nodes:
276  s += self.visit(n)
277  return s
278 
279 def resolve_unicode(item):
280  """Translate unicode types into string types, if necessary.
281 
282  This function exists to support Python 2.7.
283 
284  Parameters
285  ----------
286  item : int or str or list
287  The list of items, or item to potentially encode.
288 
289  Returns
290  -------
291  int or str or list
292  The same type as was passed to the function, encoded if necessary
293  """
294  if isinstance(item, str):
295  return item
296  elif isinstance(item, tuple):
297  return tuple([resolve_unicode(i) for i in item])
298  elif isinstance(item, list):
299  return [resolve_unicode(i) for i in item]
300  else:
301  try:
302  return item.encode('utf-8')
303  except Exception:
304  pass
305  return item
306 
307 with open(os.path.join(os.path.dirname(__file__), '..', 'share',
308  'dbtypes.json')) as f:
309  RAW_TABLE = resolve_unicode(json.load(f))
310 
311 VERSION = ""
312 TABLE_START = 0
313 TABLE_END = 0
314 for row in range(len(RAW_TABLE)):
315  current = tuple(RAW_TABLE[row])
316  if current[4] == "HDF5":
317  if current[5] > VERSION:
318  VERSION = current[5]
319  TABLE_START = row
320  if current[5] == VERSION:
321  TABLE_END = row
322 
323 TYPES_TABLE = list(tuple(row) for row in RAW_TABLE[TABLE_START:TABLE_END+1])
324 
325 CANON_TO_NODE = {}
326 CANON_SET = set()
327 DB_TO_CPP = {}
328 CANON_TO_DB = {}
329 DB_TO_VL = {}
330 INDENT = ' '
331 
332 def convert_canonical(raw_list):
333  """Converts JSON list of lists to tuple of tuples.
334 
335  Parameters
336  ----------
337  raw_list : list or str
338  List to be converted, or str
339 
340  Returns
341  -------
342  str or tuple
343  Converted list, or str
344  """
345  if isinstance(raw_list, str):
346  return raw_list
347  return tuple(convert_canonical(x) for x in raw_list)
348 
349 for row in TYPES_TABLE:
350  if row[6] == 1 and row[4] == "HDF5" and row[5] == VERSION:
351  db = row[1]
352  cpp = row[2]
353  canon = convert_canonical(row[7])
354  CANON_SET.add(canon)
355  DB_TO_CPP[db] = cpp
356  CANON_TO_DB[canon] = db
357  CANON_TO_NODE[canon] = Type(cpp=cpp, db=db, canon=canon)
358  DB_TO_VL[db] = row[8]
359 
360 def list_dependencies(canon):
361  """Return a list of a type's dependencies, each in canonical form.
362 
363  Parameters
364  ----------
365  canon : tuple or str
366  the canonical form of the type
367 
368  Returns
369  -------
370  list or str
371  list of dependencies or str if base type is primitive
372 
373  Examples:
374  >>> list_dep("('PAIR', 'INT', 'VL_STRING')")
375  [('PAIR', 'INT', 'VL_STRING'), 'INT', 'VL_STRING']
376  """
377  if isinstance(canon, str):
378  return canon
379 
380  dependency_list = [u for u in canon[1:]]
381  return [canon] + dependency_list
382 
383 def get_variable(name, depth=0, prefix=""):
384  """Return a C++ variable, appropriately formatted for depth.
385 
386  Parameters
387  ----------
388  name : str
389  Base variable name.
390  depth : int, optional
391  Depth of variable in relation to depth 0 type.
392  prefix : str, optional
393  New prefix to add, based on direct parent type.
394 
395  Returns
396  -------
397  str
398  Variable name.
399 
400  """
401  return name + str(depth) + prefix
402 
403 def get_prefix(base_prefix, parent_type, child_index):
404  """Return the prefix of a C++ variable, appropriately formatted for depth.
405 
406  Parameters
407  ----------
408  base_prefix : str
409  Prefix of direct parent type.
410  parent_type : Type
411  Node of parent type.
412  child_index : int
413  Index relative to direct parent.
414 
415  Returns
416  -------
417  str
418  New prefix.
419  """
420  return base_prefix + template_args[parent_type.canon[0]][child_index]
421 
422 def case_template(t, read_x):
423  """Represents C++ case statement.
424 
425  Parameters
426  ----------
427  t : Type
428  Depth 0 type.
429  read_x : Node
430  Nodes of case statement body.
431 
432  Returns
433  -------
434  Node
435  Complete case statement block.
436  """
437  if isinstance(read_x, Block):
438  body = read_x.nodes
439  else:
440  body = read_x
441  body += [ExprStmt(child=Var(name="break"))]
442  node = Case(cond=Var(name=t.db), body=body)
443  return node
444 
445 def primitive_setup(t, depth=0, prefix=""):
446  """HDF5 Query: Represents necessary setup steps for C++ primitives."""
447  node = Nothing()
448  return node
449 
450 def string_setup(depth=0, prefix=""):
451  """HDF5 Query: Represents necessary setup steps for C++ String."""
452  nullpos = "nullpos" + str(depth) + prefix
453 
454  node = Block(nodes=[
455  ExprStmt(child=Decl(type=Type(cpp="size_t"), name=Var(name=nullpos)))])
456  return node
457 
458 def vl_string_setup(depth=0, prefix=""):
459  """HDF5 Query: Represents necessary setup steps for C++ VL_String."""
460 
461  node = Block(nodes=[Nothing()])
462  return node
463 
464 template_args = {"MAP": ("key", "val"),
465  "VECTOR": ("elem",),
466  "SET": ("elem",),
467  "LIST": ("elem",),
468  "PAIR": ("first", "second")}
469 
470 variable_length_types = ["MAP", "LIST", "SET", "VECTOR"]
471 
472 def get_setup(t, depth=0, prefix="", HDF5_type="tb_type", child_index='j'):
473  """HDF5 Query: Get nodes representing C++ setup.
474 
475  Primitive setups are called directly, while template types are handled
476  recursively.
477 
478  Parameters
479  ----------
480  t : Type
481  C++ type, canonical form.
482  depth : int, optional
483  Depth relative to initial, depth 0 type.
484  prefix : str, optional
485  Current prefix, determined by parent type.
486  HDF5_type : str
487  hid_t type used to access HDF5 methods
488  child_index : str or int
489  Index into parent type, None if only child
490 
491  Returns
492  -------
493  Block
494  Nodes required for type t setup.
495  """
496 
497  node = Node()
498  setup_nodes = []
499 
500  if not child_index is None:
501  field_type_var = get_variable("fieldtype", depth=depth, prefix=prefix)
502  field_type = ExprStmt(child=DeclAssign(
503  type=Type(cpp="hid_t"),
504  target=Var(name=field_type_var),
505  value=FuncCall(
506  name=Raw(code="H5Tget_member_type"),
507  args=[Raw(code=HDF5_type),
508  Raw(code=str(child_index))])))
509  HDF5_type = field_type_var
510 
511  total_size_var = get_variable("total_size", depth=depth, prefix=prefix)
512  total_size = ExprStmt(child=DeclAssign(type=Type(cpp="unsigned int"),
513  target=Var(name=total_size_var),
514  value=FuncCall(
515  name=Raw(code="H5Tget_size"),
516  args=[Raw(code=HDF5_type)])))
517  if is_primitive(t):
518  if t.canon == "STRING":
519  setup_nodes.append(string_setup(depth=depth, prefix=prefix))
520  elif t.canon == "VL_STRING":
521  setup_nodes.append(vl_string_setup(depth=depth, prefix=prefix))
522  else:
523  setup_nodes.append(primitive_setup(t, depth=depth, prefix=prefix))
524  if not child_index is None:
525  setup_nodes.append(field_type)
526  TEARDOWN_STACK.append(field_type_var)
527  setup_nodes.append(total_size)
528  node = Block(nodes=setup_nodes)
529  else:
530  if DB_TO_VL[t.db]:
531  return Nothing()
532 
533  multi_items = (len(t.canon[1:]) > 1)
534 
535  children = len(t.canon) - 1
536 
537  if not child_index is None:
538  setup_nodes.append(field_type)
539  TEARDOWN_STACK.append(field_type_var)
540 
541  setup_nodes.append(total_size)
542 
543  if t.canon[0] in variable_length_types:
544  fieldlen_var = get_variable("fieldlen", depth=depth, prefix=prefix)
545  fieldlen = Block(nodes=[ExprStmt(child=Decl(
546  type=Type(cpp="hsize_t"),
547  name=Var(name=fieldlen_var))),
548  ExprStmt(child=FuncCall(
549  name=Raw(code="H5Tget_array_dims2"),
550  args=[Raw(code=HDF5_type),
551  Raw(code="&"+fieldlen_var)]))])
552  setup_nodes.append(fieldlen)
553  item_type_var = get_variable("item_type", depth=depth,
554  prefix=prefix)
555  item_type = ExprStmt(child=DeclAssign(
556  type=Type(cpp="hid_t"),
557  target=Var(name=item_type_var),
558  value=FuncCall(
559  name=Raw(code="H5Tget_super"),
560  args=[Raw(code=HDF5_type)])))
561  setup_nodes.append(item_type)
562  TEARDOWN_STACK.append(item_type_var)
563  HDF5_type = item_type_var
564  if multi_items:
565  setup_nodes.append(Block(nodes=[get_setup(
566  CANON_TO_NODE[new_type],
567  depth=depth+1,
568  prefix=prefix+part,
569  HDF5_type=HDF5_type,
570  child_index=index)
571  for new_type, part, index in zip(
572  t.canon[1:],
573  template_args[t.canon[0]],
574  [i for i in range(children)])]))
575  else:
576  setup_nodes.append(Block(nodes=[get_setup(
577  CANON_TO_NODE[new_type],
578  depth=depth+1,
579  prefix=prefix+part,
580  HDF5_type=HDF5_type,
581  child_index=None)
582  for new_type, part in zip(
583  t.canon[1:],
584  template_args[t.canon[0]])]))
585  else:
586  setup_nodes.append(Block(nodes=[get_setup(
587  CANON_TO_NODE[new_type],
588  depth=depth+1,
589  prefix=prefix+part,
590  HDF5_type=HDF5_type,
591  child_index=index)
592  for new_type, part, index in zip(
593  t.canon[1:],
594  template_args[t.canon[0]],
595  [i for i in range(children)])]))
596 
597  node = Block(nodes=setup_nodes)
598  return node
599 
600 def get_decl(t, depth=0, prefix=""):
601  """HDF5 Query: Get node representing C++ type declaration.
602 
603  Declarations occur directly before bodies, created without recursion.
604 
605  Parameters
606  ----------
607  t : Type
608  C++ type, canonical form.
609  depth : int, optional
610  Depth relative to initial, depth 0 type.
611  prefix : str, optional
612  Prefix determined by parent type.
613 
614  Returns
615  -------
616  Node
617  Declaration statement as a node.
618  """
619  variable = get_variable("x", depth=depth, prefix=prefix)
620  node = ExprStmt(child=Decl(type=t, name=Var(name=variable)))
621  return node
622 
623 def reinterpret_cast_body(t, depth=0, prefix="", base_offset="buf+offset"):
624  """HDF5 Query: Represents a body using the reinterpret_cast method.
625 
626  This includes int, double, float, etc.
627  """
628  x = get_variable("x", depth=depth, prefix=prefix)
629  tree = Block(nodes=[
630  ExprStmt(child=Assign(target=Var(name=x),
631  value=FuncCall(name=Raw(code="*reinterpret_cast"),
632  targs=[Raw(code=t.cpp+"*")],
633  args=[Raw(code=base_offset)])))])
634  return tree
635 
636 def string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None):
637  """HDF5 Query: Represents body for the C++ String primitive."""
638  if variable == None:
639  variable = get_variable("x", depth=depth, prefix=prefix)
640 
641  nullpos = get_variable("nullpos", depth=depth, prefix=prefix)
642 
643  total_size = get_variable("total_size", depth=depth, prefix=prefix)
644 
645  tree = Block(nodes=[
646  ExprStmt(child=Assign(target=Var(name=variable),
647  value=FuncCall(name=Raw(code=t.cpp),
648  args=[Raw(code=base_offset),
649  Raw(code=total_size)]))),
650  ExprStmt(child=Assign(target=Var(name=nullpos),
651  value=BinOp(x=Var(name=variable),
652  op=".",
653  y=FuncCall(name=Raw(code="find"),
654  args=[Raw(code="'\\0'")])))),
655  If(cond=BinOp(x=Var(name=nullpos), op="!=",
656  y=BinOp(x=Raw(code=t.cpp), op="::",
657  y=Raw(code="npos"))),
658  body=[ExprStmt(child=BinOp(x=Var(name=variable), op=".",
659  y=FuncCall(name=Raw(code="resize"),
660  args=[Raw(code=nullpos)])))])])
661  return tree
662 
663 def vl_string_body(t, depth=0, prefix="", base_offset="buf+offset",
664  variable=None):
665  """HDF5 Query: Represents the body for the VL_String primitive."""
666 
667  if variable == None:
668  variable = get_variable("x", depth=depth, prefix=prefix)
669 
670  tree = Block(nodes=[
671  ExprStmt(child=Assign(target=Var(name=variable),
672  value=FuncCall(name=Raw(code="VLRead"),
673  args=[Raw(code=base_offset)],
674  targs=[Raw(code=t.cpp), Raw(code=t.db)])))])
675  return tree
676 
677 def uuid_body(t, depth=0, prefix="", base_offset="buf+offset"):
678  """HDF5 Query: Represents the body for the boost::uuid primitive."""
679  x = get_variable("x", depth=depth, prefix=prefix)
680  total_size = get_variable("total_size", depth=depth, prefix=prefix)
681 
682  tree = Block(nodes=[
683  ExprStmt(child=FuncCall(name=Raw(code="memcpy"),
684  args=[Raw(code="&"+x),
685  Raw(code=base_offset),
686  Raw(code=total_size)]))])
687  return tree
688 
689 def vl_body(t, depth=0, prefix="", base_offset="buf+offset"):
690  """HDF5 Query: Represents the body for all C++ VL types."""
691  x = get_variable("x", depth=depth, prefix=prefix)
692  node = Block(nodes=[ExprStmt(child=Assign(target=Var(name=x),
693  value=FuncCall(name=Var(name="VLRead"),
694  args=[Raw(code=base_offset)],
695  targs=[Raw(code=t.cpp),
696  Raw(code=t.db)])))])
697  return node
698 
699 def map_body(t, depth=0, prefix="", base_offset="buf+offset"):
700  """HDF5 Query: Represents the body for C++ map type."""
701  x = get_variable("x", depth=depth, prefix=prefix)
702  k = get_variable("k", depth=depth, prefix=prefix)
703  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
704 
705  key = CANON_TO_NODE[t.canon[1]]
706  value = CANON_TO_NODE[t.canon[2]]
707 
708  key_prefix = prefix + template_args[t.canon[0]][0]
709  key_name = get_variable("x", depth=depth+1, prefix=key_prefix)
710  value_prefix = prefix + template_args[t.canon[0]][1]
711  value_name = get_variable("x", depth=depth+1, prefix=value_prefix)
712 
713  key_size = get_variable("total_size", depth=depth+1, prefix=key_prefix)
714  value_size = get_variable("total_size", depth=depth+1, prefix=value_prefix)
715 
716  item_size = "(" + key_size + "+" + value_size + ")"
717 
718  key_offset = base_offset + "+" + item_size + "*" + k
719  value_offset = key_offset + "+" + key_size
720 
721  node = Block(nodes=[
722  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
723  target=Var(name=k),
724  value=Raw(code="0")),
725  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
726  incr=LeftUnaryOp(op="++", name=Var(name=k)),
727  body=[
728  get_body(key, depth=depth+1, prefix=key_prefix,
729  base_offset=key_offset),
730  get_body(value, depth=depth+1, prefix=value_prefix,
731  base_offset=value_offset),
732  ExprStmt(child=Assign(target=Raw(code=x+"["+key_name+"]"),
733  value=Raw(code=value_name)))])])
734  return node
735 
736 def pair_body(t, depth=0, prefix="", base_offset="buf+offset"):
737  """HDF5 Query: Represents body for C++ pair type."""
738  x = get_variable("x", depth=depth, prefix=prefix)
739 
740  item1 = CANON_TO_NODE[t.canon[1]]
741  item2 = CANON_TO_NODE[t.canon[2]]
742 
743  item1_prefix = prefix + template_args[t.canon[0]][0]
744  item2_prefix = prefix + template_args[t.canon[0]][1]
745  item1_name = get_variable("x", depth=depth+1, prefix=item1_prefix)
746  item2_name = get_variable("x", depth=depth+1, prefix=item2_prefix)
747 
748  item1_size = get_variable("total_size", depth=depth+1, prefix=item1_prefix)
749  item2_size = get_variable("total_size", depth=depth+1, prefix=item2_prefix)
750 
751  item2_offset = base_offset + "+" + item1_size
752 
753  node = Block(nodes=[get_body(item1, depth=depth+1, prefix=item1_prefix,
754  base_offset=base_offset),
755  get_body(item2, depth=depth+1, prefix=item2_prefix,
756  base_offset=item2_offset),
757  ExprStmt(child=Assign(
758  target=Raw(code=x),
759  value=FuncCall(name=Raw(code="std::make_pair"),
760  args=[Raw(code=item1_name),
761  Raw(code=item2_name)])))])
762  return node
763 
764 def vector_primitive_body(t, depth=0, prefix="", base_offset="buf+offset"):
765  """HDF5 Query: Represents body of C++ Vector<primitive> types."""
766  x = get_variable("x", depth=depth, prefix=prefix)
767  k = get_variable("k", depth=depth, prefix=prefix)
768  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
769  total_size = get_variable("total_size", depth=depth, prefix=prefix)
770 
771  vector_start = "&" + x + "[0]"
772 
773  node = Block(nodes=[ExprStmt(child=Assign(target=Var(name=x),
774  value=FuncCall(
775  name=Raw(code=t.cpp),
776  args=[Raw(code=fieldlen)]))),
777  ExprStmt(child=FuncCall(name=Var(name="memcpy"),
778  args=[Raw(code=vector_start),
779  Raw(code=base_offset),
780  Raw(code=total_size)]))])
781  return node
782 
783 def vector_body(t, depth=0, prefix="", base_offset="buf+offset"):
784  """HDF5 Query: Represents body of C++ Vector<non-primitive> types."""
785  x = get_variable("x", depth=depth, prefix=prefix)
786  k = get_variable("k", depth=depth, prefix=prefix)
787  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
788  index = x + "[" + k + "]"
789 
790  child_prefix = get_prefix(prefix, t, 0)
791  child_var = get_variable("x", depth=depth+1, prefix=child_prefix)
792 
793  child_size = get_variable("total_size", depth=depth+1, prefix=child_prefix)
794  child_offset = base_offset + "+" + child_size + "*" + k
795 
796  node = Block(nodes=[
797  ExprStmt(child=Assign(target=Raw(code=x),
798  value=FuncCall(name=Raw(code=t.cpp),
799  args=[Raw(code=fieldlen)]))),
800  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
801  target=Var(name=k),
802  value=Raw(code="0")),
803  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
804  incr=LeftUnaryOp(op="++", name=Var(name=k)),
805  body=[
806  get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
807  prefix=child_prefix,
808  base_offset=child_offset),
809  ExprStmt(child=Assign(target=Var(name=index),
810  value=Raw(code=child_var)))
811  ])])
812  return node
813 
814 def vec_string_body(t, depth=0, prefix="", base_offset="buf+offset"):
815  """HDF5 Query: Represents body of C++ Vector<std::string> types."""
816  x = get_variable("x", depth=depth, prefix=prefix)
817  k = get_variable("k", depth=depth, prefix=prefix)
818  index = x + "[" + k + "]"
819  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
820 
821  string_prefix = get_prefix(prefix, t, 0)
822  child_size = get_variable("total_size", depth=depth+1, prefix=string_prefix)
823  child_offset = base_offset + "+" + child_size + "*" + k
824 
825  node = Block(nodes=[
826  ExprStmt(child=Assign(target=Raw(code=x),
827  value=FuncCall(name=Raw(code=t.cpp),
828  args=[Raw(code=fieldlen)]))),
829  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
830  target=Var(name=k),
831  value=Raw(code="0")),
832  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
833  incr=LeftUnaryOp(op="++", name=Var(name=k)),
834  body=[
835  string_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
836  prefix=string_prefix, base_offset=child_offset,
837  variable=index)
838  ])])
839  return node
840 
841 def set_primitive_body(t, depth=0, prefix="", base_offset="buf+offset"):
842  """HDF5 Query: Represents body of C++ set<primitive> types."""
843 
844  x = get_variable("x", depth=depth, prefix=prefix)
845  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
846  child_prefix = get_prefix(prefix, t, 0)
847  xraw = get_variable("xraw", depth=depth+1, prefix=child_prefix)
848 
849  xraw_type = CANON_TO_NODE[t.canon[1]].cpp + "*"
850 
851  node = Block(nodes=[ExprStmt(child=DeclAssign(
852  type=Type(cpp=xraw_type),
853  target=Var(name=xraw),
854  value=FuncCall(name=Raw(code="reinterpret_cast"),
855  targs=[Raw(code=xraw_type)],
856  args=[Raw(code=base_offset)]))),
857  ExprStmt(child=Assign(
858  target=Var(name=x),
859  value=FuncCall(name=Raw(code=t.cpp),
860  args=[Raw(code=xraw),
861  Raw(code=xraw
862  +"+"
863  +fieldlen)
864  ])))])
865  return node
866 
867 def set_body(t, depth=0, prefix="", base_offset="buf+offset"):
868  """HDF5 Query: Represents body of C++ set<non-primitive> types."""
869  x = get_variable("x", depth=depth, prefix=prefix)
870  k = get_variable("k", depth=depth, prefix=prefix)
871 
872  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
873 
874  child_prefix = get_prefix(prefix, t, 0)
875  child_var = get_variable("x", depth=depth+1, prefix=child_prefix)
876 
877  item_size = get_variable("total_size", depth=depth+1, prefix=child_prefix)
878 
879  child_offset = base_offset + "+" + item_size + "*" + k
880 
881  node = Block(nodes=[
882  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
883  target=Var(name=k),
884  value=Raw(code="0")),
885  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
886  incr=LeftUnaryOp(op="++", name=Var(name=k)),
887  body=[
888  get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
889  prefix=child_prefix, base_offset=child_offset),
890  ExprStmt(child=FuncCall(name=Raw(code=x+".insert"),
891  args=[Raw(code=child_var)]))])])
892  return node
893 
894 def set_string_body(t, depth=0, prefix="", base_offset="buf+offset"):
895  """HDF5 Query: Represents body of C++ set<std::string> types."""
896  x = get_variable("x", depth=depth, prefix=prefix)
897  k = get_variable("k", depth=depth, prefix=prefix)
898 
899  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
900 
901  string_prefix = get_prefix(prefix, t, 0)
902  string_size = get_variable("total_size", depth=depth+1,
903  prefix=string_prefix)
904  string_name = get_variable("x", depth=depth+1, prefix=string_prefix)
905 
906  offset = base_offset + "+" + string_size + "*" + k
907 
908  node = Block(nodes=[
909  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
910  target=Var(name=k),
911  value=Raw(code="0")),
912  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
913  incr=LeftUnaryOp(op="++", name=Var(name=k)),
914  body=[
915  string_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
916  prefix=string_prefix, base_offset=offset),
917  ExprStmt(child=FuncCall(name=Raw(code=x+".insert"),
918  args=[Raw(code=string_name)]))])])
919  return node
920 
921 def list_primitive_body(t, depth=0, prefix="", base_offset="buf+offset"):
922  """HDF5 Query: Represents body of C++ list<primitive> types."""
923  x = get_variable("x", depth=depth, prefix=prefix)
924  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
925  child_prefix = get_prefix(prefix, t, 0)
926  xraw = get_variable("xraw", depth=depth+1, prefix=child_prefix)
927 
928  xraw_type = CANON_TO_NODE[t.canon[1]].cpp + "*"
929 
930  node = Block(nodes=[ExprStmt(child=DeclAssign(
931  type=Type(cpp=xraw_type),
932  target=Var(name=xraw),
933  value=FuncCall(name=Raw(code="reinterpret_cast"),
934  targs=[Raw(code=xraw_type)],
935  args=[Raw(code=base_offset)]))),
936  ExprStmt(child=Assign(
937  target=Var(name=x),
938  value=FuncCall(name=Raw(code=t.cpp),
939  args=[Raw(code=xraw),
940  Raw(code=xraw
941  +"+"
942  +fieldlen)
943  ])))])
944  return node
945 
946 def list_body(t, depth=0, prefix="", base_offset="buf+offset"):
947  """HDF5 Query: Represents body of C++ list<non-primitive> types."""
948  x = get_variable("x", depth=depth, prefix=prefix)
949  k = get_variable("k", depth=depth, prefix=prefix)
950  child_prefix = get_prefix(prefix, t, 0)
951  child_variable = get_variable("x", depth=depth+1, prefix=child_prefix)
952  fieldlen = get_variable("fieldlen", depth=depth, prefix=prefix)
953  item_size = get_variable("total_size", depth=depth+1, prefix=child_prefix)
954  offset = base_offset + "+" + item_size + "*" + k
955 
956  node = Block(nodes=[
957  For(adecl=DeclAssign(type=Type(cpp="unsigned int"),
958  target=Var(name=k),
959  value=Raw(code="0")),
960  cond=BinOp(x=Var(name=k), op="<", y=Var(name=fieldlen)),
961  incr=LeftUnaryOp(op="++", name=Var(name=k)),
962  body=[
963  get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
964  prefix=child_prefix, base_offset=offset),
965  ExprStmt(child=FuncCall(name=Raw(code=x+".push_back"),
966  args=[Raw(code=child_variable)]))])])
967  return node
968 
969 BODIES = {"INT": reinterpret_cast_body,
970  "DOUBLE": reinterpret_cast_body,
971  "FLOAT": reinterpret_cast_body,
972  "BOOL": reinterpret_cast_body,
973  "UUID": uuid_body,
974  "STRING": string_body,
975  "VL_STRING": vl_body,
976  "BLOB": vl_body,
977  "VECTOR_STRING": vec_string_body,
978  "MAP": map_body,
979  "PAIR": pair_body,
980  "LIST_INT": list_primitive_body,
981  "LIST_DOUBLE": list_primitive_body,
982  "LIST_FLOAT": list_primitive_body,
983  "LIST": list_body,
984  "SET_INT": set_primitive_body,
985  "SET_DOUBLE": set_primitive_body,
986  "SET_FLOAT": set_primitive_body,
987  "SET": set_body,
988  "VECTOR_INT": vector_primitive_body,
989  "VECTOR_DOUBLE": vector_primitive_body,
990  "VECTOR_FLOAT": vector_primitive_body,
991  "VECTOR": vector_body}
992 
993 def get_body(t, depth=0, prefix="", base_offset="buf+offset"):
994  """HDF5 Query: Get body nodes for a C++ type.
995 
996  Parameters
997  ----------
998  t : Type
999  C++ type, canonical form.
1000  depth : int, optional
1001  Depth relative to initial, depth 0 type.
1002  prefix : str, optional
1003  Current prefix, determined by parent type.
1004 
1005  Returns
1006  -------
1007  Node
1008  Body nodes required for the type.
1009  """
1010  block = []
1011  block.append(get_decl(t, depth=depth, prefix=prefix))
1012  if is_primitive(t):
1013  if depth == 0:
1014  block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1015  base_offset=base_offset))
1016  else:
1017  block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1018  base_offset=base_offset))
1019  elif DB_TO_VL[t.db]:
1020  block.append(vl_body(t, depth=depth, prefix=prefix,
1021  base_offset=base_offset))
1022  elif t.db in BODIES:
1023  block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1024  base_offset=base_offset))
1025  elif t.canon[0] in BODIES:
1026  block.append(BODIES[t.canon[0]](t, depth=depth, prefix=prefix,
1027  base_offset=base_offset))
1028  else:
1029  raise ValueError("No generation specified for type " + t.db)
1030  return Block(nodes=block)
1031 
1032 # teardown functions
1033 
1034 TEARDOWN_STACK = []
1035 VARS = []
1036 
1038  """Represents the generic close to an hdf5 type code block."""
1039  x = get_variable("x", depth=0, prefix="")
1040 
1041  tree = Block(nodes=[
1042  ExprStmt(child=Assign(target=Var(name="is_row_selected"),
1043  value=FuncCall(name=Var(name="CmpConds"),
1044  targs=[Raw(code=t.cpp)],
1045  args=[Raw(code="&"+x),
1046  Raw(code="&(field_conds[qr.fields[j]])")]))),
1047  If(cond=Var(name="is_row_selected"),
1048  body=[ExprStmt(child=Assign(target=Var(name="row[j]"),
1049  value=Var(name=x)))])])
1050 
1051  for i in range(len(TEARDOWN_STACK)):
1052  var_name = TEARDOWN_STACK.pop()
1053  teardown = ExprStmt(child=FuncCall(name=Var(name="H5Tclose"),
1054  args=[Raw(code=var_name)]))
1055  tree.nodes.append(teardown)
1056  return tree
1057 
1059  return normal_close(t)
1060 
1061 def indent(text, prefix, predicate=None):
1062  """This function copied from textwrap library version 3.3.
1063 
1064  Adds 'prefix' to the beginning of selected lines in 'text'.
1065  If 'predicate' is provided, 'prefix' will only be added to the lines
1066  where 'predicate(line)' is True. If 'predicate' is not provided,
1067  it will default to adding 'prefix' to all non-empty lines that do not
1068  consist solely of whitespace characters.
1069  """
1070  if predicate is None:
1071  def predicate(line):
1072  return line.strip()
1073 
1074  def prefixed_lines():
1075  for line in text.splitlines(True):
1076  yield (prefix + line if predicate(line) else line)
1077  return ''.join(prefixed_lines())
1078 
1079 def typeid(t):
1080  node = CANON_TO_NODE[t]
1081  return FuncCall(name=Raw(code="typeid"), args=[Raw(code=node.cpp)])
1082 
1083 def no_vl(t):
1084  if DB_TO_VL[t.db]:
1085  return False
1086  else:
1087  if is_primitive(t):
1088  return True
1089  else:
1090  ret = True
1091  for i in t.canon[1:]:
1092  ret = ret and no_vl(CANON_TO_NODE[i])
1093  return ret
1094 
1095 def get_dim_shape(canon, start=0, depth=0):
1096  tshape = []
1097  i = 0
1098  if isinstance(canon, str):
1099  tshape = start + i
1100  i += 1
1101  if depth == 0:
1102  return i, [tshape]
1103  else:
1104  return i, tshape
1105  else:
1106  for u in canon:
1107  j, jshape = get_dim_shape(u, start=start+i, depth=depth+1)
1108  i += j
1109  tshape.append(jshape)
1110  return i, tshape
1111 
1112 def flatten(canon):
1113  if isinstance(canon, str):
1114  return [canon]
1115  result = list(canon)
1116  result[0] = canon
1117  i = 1
1118  while i < len(result):
1119  if isinstance(result[i], str):
1120  i += 1
1121  else:
1122  temp = result[i][1:]
1123  i += 1
1124  for j in range(0, len(temp)):
1125  result.insert(i+j, temp[j])
1126  return tuple(result)
1127 
1129  """HDF5 Create: Generate C++ if-statement condition for a given type.
1130 
1131  These if-statements are always a string of boolean expressions of the form
1132  'shape[n]<1' or 'shape[n]>=1', where n is an index into the C++ shape array.
1133  A shape index less than one (<1) denotes a variable length type, whereas an
1134  index greater than one (>=1) denotes fixed length type. These boolean
1135  expressions are joined by '&&' operators. For instance, a type of
1136  VL_MAP_VL_STRING_VL_STRING would receive the condition
1137  'shape[0]<1 && shape[1]<1 && shape[2]<1'.
1138 
1139  Parameters
1140  ----------
1141  t : Type
1142  C++ type for the boolean condition.
1143 
1144  Returns
1145  -------
1146  current_bool : BinOp
1147  Node representing the boolean condition.
1148  """
1149  vl_count = 0
1150  vl_potential_count = 0
1151  op_list = []
1152  shape_len, dim_shape = get_dim_shape(t.canon)
1153 
1154  flat_canon = flatten(t.canon)
1155  flat_shape = zip(flat_canon, [x for x in range(shape_len)])
1156 
1157  for sub_type, index in flat_shape:
1158  node = CANON_TO_NODE[sub_type]
1159  # This type is VL
1160  if DB_TO_VL[node.db]:
1161  vl_count += 1
1162  vl_potential_count += 1
1163  op_list.append(BinOp(x=Raw(code="shape["+str(index)+"]"),
1164  op="<", y=Raw(code="1")))
1165  # Find out if type could be VL
1166  else:
1167  orig_type = ORIGIN_DICT[sub_type]
1168  if is_primitive(CANON_TO_NODE[orig_type]):
1169  if VARIATION_DICT[orig_type]:
1170  vl_potential_count += 1
1171  op_list.append(BinOp(x=Raw(code="shape["+str(index)+"]"),
1172  op=">=", y=Raw(code="1")))
1173  else:
1174  if orig_type[0] in variable_length_types:
1175  vl_potential_count += 1
1176  op_list.append(BinOp(x=Raw(code="shape["+str(index)+"]"),
1177  op=">=", y=Raw(code="1")))
1178  current_bool = op_list[0]
1179  for i in range(1,len(op_list)):
1180  current_bool = BinOp(x=current_bool, op="&&", y=op_list[i])
1181 
1182  if vl_count == vl_potential_count:
1183  current_bool = BinOp(x=Raw(code="shape.empty()"), op="||",
1184  y=current_bool)
1185  return current_bool
1186 
1187 def VL_ADD_BLOCK(t, item_var):
1188  node = If(cond=BinOp(x=FuncCall(name=Raw(code="vldts_.count"),
1189  args=[Raw(code=t.db)]),
1190  op="==",
1191  y=Raw(code="0")),
1192  body=[ExprStmt(child=BinOp(
1193  x=Raw(code="vldts_["+t.db+"]"),
1194  op="=",
1195  y=Raw(code="H5Tvlen_create("+item_var+")"))),
1196  ExprStmt(child=FuncCall(
1197  name=Raw(code="opened_types_.insert"),
1198  args=[Raw(code="vldts_["+t.db+"]")]))])
1199  return node
1200 
1201 def print_statement(t, identifier):
1202  """Generate C++ print statement for debugging generated code."""
1203  msg_string = t.db + ": got here: " + str(identifier)
1204  return ExprStmt(child=Raw(code="std::cerr<<\"" + msg_string
1205  + "\" << std::endl"))
1206 
1208  """HDF5 Create: Generate C++ if-statement body for a given type.
1209 
1210  Called in coordination with get_variation_cond. For a given C++ type, this
1211  function returns the necessary C++ statements to create the HDF5 version
1212  of that type.
1213 
1214  Parameters
1215  ----------
1216  t : Type
1217  C++ type for which to create an if-statement body.
1218 
1219  Returns
1220  -------
1221  body : Block
1222  Node containing necessary C++ statements for HDF5 creation.
1223  """
1224  body = Block(nodes=[])
1225  # This handles types with non-standard bodies that we wish to
1226  # handle directly.
1227  if t.db in RAW_TYPES:
1228  return RAW_TYPES[t.db]
1229 
1230  body.nodes.append(ExprStmt(child=Raw(code="dbtypes[i]="+ t.db)))
1231 
1232  item_nodes, opened_types = get_item_type(t)
1233  body.nodes.append(item_nodes)
1234  type_var = opened_types[-1] if opened_types != [] else get_variable(
1235  "item_type",
1236  prefix="",
1237  depth=0)
1238 
1239  is_vl = True if DB_TO_VL[t.db] else False
1240  size_expression = Raw(code=get_item_size(t, vl_flag=is_vl))
1241  body.nodes.append(ExprStmt(child=BinOp(x=Raw(code="dst_sizes[i]"),
1242  op="=",
1243  y=size_expression)))
1244  if DB_TO_VL[t.db]:
1245  if is_primitive(t):
1246  default_item_var = type_var
1247  else:
1248  if opened_types == []:
1249  item_prefix = template_args[VL_TO_FL_CONTAINERS[t.canon[0]]][0]
1250  default_item_var = get_variable("item_type",
1251  prefix=item_prefix,
1252  depth=1)
1253  else:
1254  default_item_var = get_variable("item_type", prefix="", depth=1)
1255  body.nodes.append(ExprStmt(child=BinOp(x=Raw(code="field_types[i]"),
1256  op="=",
1257  y=Raw(code="sha1_type_"))))
1258  body.nodes.append(VL_ADD_BLOCK(t, default_item_var))
1259  else:
1260  body.nodes.append(ExprStmt(child=BinOp(x=Raw(code="field_types[i]"),
1261  op="=",
1262  y=Raw(code=type_var))))
1263  for opened in opened_types[:-1]:
1264  body.nodes.append(ExprStmt(child=FuncCall(
1265  name=Raw(code="opened_types_.insert"),
1266  args=[Raw(code=opened)])))
1267  if not is_primitive(t):
1268  body.nodes.append(ExprStmt(child=FuncCall(
1269  name=Raw(code="opened_types_.insert"),
1270  args=[Raw(code="field_types[i]")])))
1271  return body
1272 
1273 HDF5_PRIMITIVES = {"INT": "H5T_NATIVE_INT",
1274  "DOUBLE": "H5T_NATIVE_DOUBLE",
1275  "FLOAT": "H5T_NATIVE_FLOAT",
1276  "BOOL": "H5T_NATIVE_CHAR",
1277  "STRING": "CreateFLStrType({size})",
1278  "BLOB": "sha1_type_",
1279  "UUID": "uuid_type_"}
1280 
1281 PRIMITIVE_SIZES = {"INT": "sizeof(int)",
1282  "DOUBLE": "sizeof(double)",
1283  "FLOAT": "sizeof(float)",
1284  "BOOL": "sizeof(char)",
1285  "VL_STRING": "CYCLUS_SHA1_SIZE",
1286  "BLOB": "CYCLUS_SHA1_SIZE",
1287  "UUID": "CYCLUS_UUID_SIZE"}
1288 
1289 VL_TO_FL_CONTAINERS = {"VL_VECTOR": "VECTOR",
1290  "VL_SET": "SET",
1291  "VL_LIST": "LIST",
1292  "VL_MAP": "MAP"}
1293 
1294 def get_item_type(t, shape_array=None, vl_flag=False, prefix="", depth=0):
1295  """HDF5 Create: Build specified HDF5 type, recursively if necessary.
1296 
1297  HDF5 types are Primitive, Compound, or Array. We handle each of these cases
1298  here differently. Primitives are immediately returned by querying the
1299  HDF5_PRIMITIVES dictionary. Compound types are made up of multiple
1300  Primitive or Compound types, so each of these child types must be declared
1301  and created before the parent type can be created. This is accomplished via
1302  recursion over every child type in the type's canon. It should be noted that
1303  Compound types depend heavily on the size of those types they contain,
1304  and this function relies on get_item_size for that information. Finally,
1305  Arrays can contain one Primitive or Compound type, and are created by
1306  specifying this child type, Array dimensions, and the Array length.
1307 
1308  Parameters
1309  ----------
1310  t : Type
1311  Type node representing C++ type
1312  shape_array : list, optional
1313  Dimensioned list of current type shape
1314  prefix : str, optional
1315  Used to name C++ variables throughout multiple levels of recursion
1316  depth : int, optional
1317  Recursive depth counter
1318 
1319  Returns
1320  -------
1321  node : Block
1322  Cumulative collection of nodes necessary for specified item type
1323  opened_stack : list
1324  Cumulative collection of opened HDF5 types which must eventually be
1325  closed
1326 
1327  """
1328  # We need to keep a persistant shape array, unless initial call.
1329  if shape_array == None:
1330  shape_len, dim_shape = get_dim_shape(t.canon)
1331  else:
1332  dim_shape = shape_array
1333  node = Block(nodes=[])
1334  opened_stack = []
1335  type_var = get_variable("item_type", prefix=prefix, depth=depth)
1336  node.nodes.append(ExprStmt(child=Decl(type=Type(cpp="hid_t"),
1337  name=Var(name=type_var))))
1338  # Handle primitives
1339  if isinstance(t.canon, str):
1340  if DB_TO_VL[t.db] or (t.canon == "STRING" and vl_flag):
1341  node.nodes.append(ExprStmt(child=Assign(
1342  target=Var(name=type_var),
1343  value=Raw(code="sha1_type_"))))
1344  return node, opened_stack
1345  else:
1346  primitive_type = Raw(code=HDF5_PRIMITIVES[t.db].format(size="shape["
1347  +str(dim_shape[0])
1348  +"]"))
1349  node.nodes.append(ExprStmt(child=Assign(target=Var(name=type_var),
1350  value=primitive_type)))
1351  return node, opened_stack
1352  # Handle dependent types
1353  else:
1354  container_type = t.canon[0]
1355  canon_shape = list(zip(t.canon, dim_shape))
1356  is_vl = vl_flag
1357  if DB_TO_VL[t.db]:
1358  container_type = VL_TO_FL_CONTAINERS[t.canon[0]]
1359  is_vl = True
1360  else:
1361  if t.canon[0] in variable_length_types:
1362  shape_var = get_variable("shape0", prefix="", depth=depth+1)
1363  node.nodes.append(ExprStmt(
1364  child=DeclAssign(
1365  type=Type(cpp="hsize_t"),
1366  target=Var(name=shape_var),
1367  value=Raw(code="shape["
1368  +str(dim_shape[0])+"]"))))
1369  item_var = ""
1370  if len(canon_shape[1:]) == 1:
1371  # Not a compound type.
1372  item_canon, item_shape = canon_shape[1]
1373  # Get nodes initializing our child type
1374  child_array = (item_shape if isinstance(item_shape, list)
1375  else [item_shape])
1376  new_prefix = template_args[container_type][0]
1377  child_node, child_opened = get_item_type(CANON_TO_NODE[item_canon],
1378  shape_array=child_array,
1379  vl_flag=is_vl,
1380  prefix=new_prefix,
1381  depth=depth+1)
1382  node.nodes.append(child_node)
1383  opened_stack.extend(child_opened)
1384  child_var = get_variable("item_type", prefix=new_prefix,
1385  depth=depth+1)
1386 
1387  item_var = child_var
1388  else:
1389  # This is a compound type.
1390  child_dict = OrderedDict()
1391  # 1. Get all child item type nodes, recursively.
1392  for i in range(1, len(canon_shape)):
1393  item_canon, item_shape = canon_shape[i]
1394  item_node = CANON_TO_NODE[item_canon]
1395  pre_opened_len = len(opened_stack)
1396  child_array = (item_shape if isinstance(item_shape, list)
1397  else [item_shape])
1398  new_prefix = template_args[container_type][i-1]
1399  child_node, child_opened = get_item_type(item_node,
1400  shape_array=child_array,
1401  vl_flag=is_vl,
1402  prefix=new_prefix,
1403  depth=depth+1)
1404  node.nodes.append(child_node)
1405  opened_stack.extend(child_opened)
1406 
1407  # if the previous opened stack and current stack are the same,
1408  # we know that the child is a primitive, and we can generate
1409  # its variable accordingly.
1410  if len(opened_stack) == pre_opened_len:
1411  child_item_var = get_variable("item_type",
1412  prefix=new_prefix,
1413  depth=depth+1)
1414  # However, if the current opened stack is longer, the first new
1415  # variable there will be our child variable.
1416  else:
1417  child_item_var = opened_stack[pre_opened_len]
1418  # 2. Get item sizes.
1419  child_dict[child_item_var] = get_item_size(item_node,
1420  child_array,
1421  vl_flag=is_vl,
1422  depth=depth+1)
1423  # 3. Create compound type using total item size.
1424  compound = hdf5_create_compound(list(child_dict.values()))
1425 
1426  item_var = get_variable("item_type", prefix="", depth=depth+1)
1427  node.nodes.append(ExprStmt(child=Decl(type=Type(cpp="hid_t"),
1428  name=Raw(code=item_var))))
1429 
1430  node.nodes.append(ExprStmt(child=Assign(target=Raw(code=item_var),
1431  value=compound)))
1432 
1433  opened_stack.append(item_var)
1434  # 4. Insert individual children into the compound type.
1435  node.nodes.append(hdf5_insert(container_type, item_var, child_dict))
1436 
1437  if container_type in variable_length_types and not DB_TO_VL[t.db]:
1438  array_node = ExprStmt(child=Assign(target=Var(name=type_var),
1439  value=hdf5_array_create(
1440  item_var,
1441  rank=1,
1442  dims="&"+shape_var)))
1443 
1444  opened_stack.append(type_var)
1445  node.nodes.append(array_node)
1446  return node, opened_stack
1447 
1448 def get_item_size(t, shape_array=None, vl_flag=False, depth=0):
1449  """Resolves item size recursively.
1450 
1451  We can dig down into a type until we reach eventual primitives, and then
1452  multiply the known sizes of those primitives by the lengths of their
1453  containers. Container length is defined in the C++ shape array.
1454 
1455  Parameters
1456  ----------
1457  t : Type
1458  The type whose size is in question
1459  shape_array : list, optional
1460  Dimensioned list of shape array indicies, same shape as t.canon
1461  depth : int, optional
1462  Recursive depth counter
1463 
1464  Returns
1465  -------
1466  size : str
1467  String of C++ expression representing t's size.
1468  """
1469  if shape_array == None:
1470  shape_array = get_dim_shape(t.canon)[1]
1471  if is_primitive(t):
1472  if t.db in PRIMITIVE_SIZES.keys():
1473  return PRIMITIVE_SIZES[t.db]
1474  else:
1475  if not vl_flag:
1476  return "shape[" + str(shape_array[0]) + "]"
1477  else:
1478  return "CYCLUS_SHA1_SIZE"
1479  else:
1480  size = "("
1481  if DB_TO_VL[t.db]:
1482  size += "CYCLUS_SHA1_SIZE"
1483  else:
1484  size += "("
1485  if len(t.canon[1:]) > 1:
1486  children = []
1487  for child_index in range(1, len(t.canon)):
1488  child_array = shape_array[child_index]
1489  if not isinstance(child_array, list):
1490  child_array = [child_array]
1491  children.append(get_item_size(
1492  CANON_TO_NODE[t.canon[child_index]],
1493  child_array,
1494  vl_flag=vl_flag,
1495  depth=depth+1))
1496  size += "+".join(children)
1497  else:
1498  child_array = shape_array[1]
1499  if not isinstance(child_array, list):
1500  child_array = [child_array]
1501  size += get_item_size(CANON_TO_NODE[t.canon[1]], child_array,
1502  vl_flag=vl_flag, depth=depth+1)
1503  size += ")"
1504  if t.canon[0] in variable_length_types:
1505  size += "*" + "shape[" + str(shape_array[0]) + "]"
1506  size += ")"
1507  return size
1508 
1509 def hdf5_array_create(item_variable, rank=1, dims="&shape0"):
1510  """Node representation of the C++ H5Tarray_create2 method.
1511 
1512  Parameters
1513  ----------
1514  item_variable : str
1515  Variable name of HDF5 array item.
1516  rank : int, optional
1517  Number of HDF5 array dimensions.
1518  dims : str, optional
1519  Variable (by reference) of shape array belonging to HDF5 array
1520 
1521  Returns
1522  -------
1523  node : FuncCall
1524  Node of H5Tarray_create2 function call.
1525  """
1526  node = FuncCall(name=Var(name="H5Tarray_create2"),
1527  args=[Raw(code=item_variable), Raw(code=str(rank)),
1528  Raw(code=dims)])
1529  return node
1530 
1532  """Node representation of the C++ HDF5 compound type creation function.
1533 
1534  Parameters
1535  ----------
1536  sizes : list
1537  List of type sizes, all must be str type.
1538 
1539  Returns
1540  -------
1541  node : FuncCall
1542  H5Tcreate function call node.
1543  """
1544  node = FuncCall(name=Var(name="H5Tcreate"), args=[Raw(code="H5T_COMPOUND"),
1545  Raw(code="+".join(sizes))])
1546  return node
1547 
1548 def hdf5_insert(container_type, compound_var, types_sizes_dict):
1549  """Node representation of the C++ H5Tinsert function.
1550 
1551  This function is used to identify partitions within an already established
1552  HDF5 Compound type. That is, we specify which inner types are located at
1553  what memory location within the Compound type.
1554 
1555  Parameters
1556  ----------
1557  container_type : str
1558  Should be a key in the template_args dict
1559  compound_var : str
1560  C++ variable to which the function should refer
1561  types_sizes_dict : dict
1562  Dictionary of C++ type variables mapped to their size in memory
1563 
1564  Returns
1565  -------
1566  node : Block
1567  Cumulative nodes for H5Tinsert function
1568  """
1569  node = Block(nodes=[])
1570  buf = str(0)
1571  keys = list(types_sizes_dict.keys())
1572  for i in range(len(types_sizes_dict)):
1573  type_var = keys[i]
1574  type_size = types_sizes_dict[type_var]
1575  descriptor = "\"" + template_args[container_type][i] + "\""
1576  func = FuncCall(name=Var(name="H5Tinsert"), args=[])
1577  func.args.append(Raw(code=compound_var))
1578  func.args.append(Raw(code=descriptor))
1579  func.args.append(Raw(code=buf))
1580  buf += "+" + type_size
1581  func.args.append(Raw(code=type_var))
1582  node.nodes.append(ExprStmt(child=func))
1583  return node
1584 
1586  """HDF5 Query: Generate Query case statement code."""
1587  CPPGEN = CppGen()
1588  output = ""
1589  for type in CANON_SET:
1590  type_node = CANON_TO_NODE[type]
1591  setup = get_setup(type_node)
1592  body = get_body(type_node)
1593  teardown = get_teardown(type_node)
1594  read_x = Block(nodes=[setup, body, teardown])
1595  output += CPPGEN.visit(case_template(type_node, read_x))
1596  output = indent(output, INDENT * 5)
1597  return output
1598 
1599 NOT_VL = []
1600 VARIATION_DICT = {}
1601 ORIGIN_DICT = {}
1602 
1603 io_error = Raw(code=("throw IOError(\"the type for column \'\"+"
1604  "std::string(field_names[i])+\"\' is not yet supported "
1605  "in HDF5.\");"))
1606 
1607 raw_string = Raw(code=("dbtypes[i]=STRING;\n"
1608  "field_types[i]=H5Tcopy(H5T_C_S1);\n"
1609  "H5Tset_size(field_types[i], shape[0]);\n"
1610  "H5Tset_strpad(field_types[i], H5T_STR_NULLPAD);\n"
1611  "opened_types_.insert(field_types[i]);\n"
1612  "dst_sizes[i]=sizeof(char)*shape[0];\n"))
1613 
1614 raw_blob = Raw(code=("dbtypes[i]=BLOB;\n"
1615  "field_types[i]=sha1_type_;\n"
1616  "dst_sizes[i]=CYCLUS_SHA1_SIZE;\n"))
1617 
1618 RAW_TYPES = {"STRING": raw_string,
1619  "BLOB": raw_blob}
1620 
1621 DEBUG_TYPES = ["VECTOR_STRING"]
1622 
1624  """HDF5 Create: Generate CreateTable if-statements."""
1625  CPPGEN = CppGen()
1626  global NOT_VL
1627  global VARIATION_DICT
1628  global ORIGIN_DICT
1629  output = ""
1630  fixed_length_types = set(t for t in CANON_SET if no_vl(CANON_TO_NODE[t]))
1631 
1632  VARIATION_DICT = {}
1633  for n in fixed_length_types:
1634  key = CANON_TO_NODE[n]
1635  vals = []
1636  for x in CANON_SET:
1637  val_node = CANON_TO_NODE[x]
1638  if val_node.cpp == key.cpp and val_node.db != key.db:
1639  vals.append(val_node)
1640  VARIATION_DICT[n] = vals
1641 
1642  VARIATION_DICT[('BLOB')] = []
1643  VARIATION_DICT['STRING'] = [CANON_TO_NODE['VL_STRING']]
1644 
1645  for i in VARIATION_DICT.keys():
1646  ORIGIN_DICT[i] = i
1647  if VARIATION_DICT[i] != []:
1648  for j in VARIATION_DICT[i]:
1649  ORIGIN_DICT[j.canon] = i
1650 
1651  NOT_VL = [x for x in VARIATION_DICT.keys() if not VARIATION_DICT[x]]
1652 
1653  outer_if_bodies = {n: Block(nodes=[]) for n in VARIATION_DICT.keys()}
1654 
1655  for n in VARIATION_DICT.keys():
1656  variations = VARIATION_DICT[n][:]
1657  key_node = CANON_TO_NODE[n]
1658  try:
1659  initial_type = variations.pop()
1660  sub_if = If(cond=get_variation_cond(initial_type),
1661  body=[get_variation_body(initial_type)],
1662  elifs=[(get_variation_cond(v),
1663  [get_variation_body(v)])
1664  for v in variations],
1665  el=Block(nodes=[get_variation_body(key_node)]))
1666  outer_if_bodies[n].nodes.append(sub_if)
1667  except IndexError:
1668  lone_node = get_variation_body(key_node)
1669  outer_if_bodies[n].nodes.append(lone_node)
1670 
1671  shape_line = ExprStmt(child=Raw(code="shape=shapes[i]"))
1672 
1673  initial_node, initial_body = outer_if_bodies.popitem()
1674  if_statement = If(cond=BinOp(x=Var(name="valtype"), op="==",
1675  y=typeid(initial_node)),
1676  body=[shape_line, initial_body],
1677  elifs=[(BinOp(x=Var(name="valtype"), op="==",
1678  y=typeid(t)),
1679  [shape_line, outer_if_bodies[t]])
1680  for t in outer_if_bodies.keys()],
1681  el=io_error)
1682  output += CPPGEN.visit(if_statement)
1683  output = indent(output, INDENT)
1684  return output
1685 
1686 MAIN_DISPATCH = {"QUERY": main_query,
1687  "CREATE": main_create}
1688 
1689 def main():
1690  try:
1691  gen_instruction = sys.argv[1]
1692  except:
1693  raise ValueError("No generation instruction provided")
1694  function = MAIN_DISPATCH[gen_instruction]
1695  print(function())
1696 
1697 if __name__ == '__main__':
1698  main()
def get_decl(t, depth=0, prefix="")
def visit_var(self, node)
def visit_case(self, node)
def flatten(canon)
def visit_type(self, node)
def visit_funccall(self, node)
def resolve_unicode(item)
def list_body(t, depth=0, prefix="", base_offset="buf+offset")
def __init__(self, tree=None)
def vector_body(t, depth=0, prefix="", base_offset="buf+offset")
def visit_block(self, node)
def hdf5_array_create(item_variable, rank=1, dims="&shape0")
def visit_if(self, node)
def set_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")
def hdf5_create_compound(sizes)
def set_body(t, depth=0, prefix="", base_offset="buf+offset")
def get_setup(t, depth=0, prefix="", HDF5_type="tb_type", child_index='j')
def case_template(t, read_x)
def vl_body(t, depth=0, prefix="", base_offset="buf+offset")
def visit_nothing(self, node)
def reinterpret_cast_body(t, depth=0, prefix="", base_offset="buf+offset")
def visit_assign(self, node)
def __init__(self, tree=None, indent=' ')
def get_dim_shape(canon, start=0, depth=0)
def vl_string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None)
def map_body(t, depth=0, prefix="", base_offset="buf+offset")
def VL_ADD_BLOCK(t, item_var)
def visit_leftunaryop(self, node)
def convert_canonical(raw_list)
def vec_string_body(t, depth=0, prefix="", base_offset="buf+offset")
def uuid_body(t, depth=0, prefix="", base_offset="buf+offset")
def set_string_body(t, depth=0, prefix="", base_offset="buf+offset")
def string_setup(depth=0, prefix="")
def indent(text, prefix, predicate=None)
def primitive_setup(t, depth=0, prefix="")
def visit_exprstmt(self, node)
def visit_rightunaryop(self, node)
def pair_body(t, depth=0, prefix="", base_offset="buf+offset")
def hdf5_insert(container_type, compound_var, types_sizes_dict)
def list_dependencies(canon)
def __init__(self, tree=None, indent=' ')
def visit_for(self, node)
def print_statement(t, identifier)
def get_variation_cond(t)
def get_variation_body(t)
def list_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")
def get_variable(name, depth=0, prefix="")
def get_prefix(base_prefix, parent_type, child_index)
def visit_declassign(self, node)
def visit_decl(self, node)
def get_item_size(t, shape_array=None, vl_flag=False, depth=0)
def __init__(self, kwargs)
def visit_raw(self, node)
def vl_string_setup(depth=0, prefix="")
def get_item_type(t, shape_array=None, vl_flag=False, prefix="", depth=0)
def string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None)
def visit_binop(self, node)
def get_body(t, depth=0, prefix="", base_offset="buf+offset")
def visit(self, node=None)
def vector_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")