2"""This module generates HDF5 backend code found in src/hdf5_back.cc
4There are 8 distinct code generation options, one of which must be passed
5as an argument to this module. They are CREATE, QUERY, VL_DATASET,
6FILL_BUF, WRITE, VAL_TO_BUF_H, VAL_TO_BUF, and BUF_TO_VAL. Each of these
7generates a different section of Hdf5 backend code. All are invoked by
8src/CMakeLists.txt prior to C++ compilation. However, for debugging purposes,
9each section can be printed individually by passing that section's identifier
10as a command line argument. The entry point for each of these generation
11routines is the function main_XXXX, where XXXX is a generation option.
15To generate the code found in src/hdf5_back.cc::Query, use
17 $ python3 hdf5_back_gen.py QUERY
23from pprint
import pformat
24from itertools
import chain
25from collections
import OrderedDict
37VARIATION_DICT = OrderedDict()
38ORIGIN_DICT = OrderedDict()
41is_primitive =
lambda t: isinstance(t.canon, str)
48 for field, value
in kwargs.items():
49 if field
not in self.
fields:
50 print(field,
" is not a valid field")
52 setattr(self, field, value)
56 setattr(self, field,
None)
68 fields = (
"cpp",
"db",
"canon")
71 fields = (
"type",
"name")
83 fields = (
"target",
"value")
86 fields = (
"type",
"target",
"value")
89 fields = (
"cond",
"body")
92 fields = (
"cond",
"body",
"elifs",
"el")
95 fields = (
"adecl",
"cond",
"incr",
"body")
98 fields = (
"x",
"op",
"y")
101 fields = (
"op",
"name")
104 fields = (
"name",
"op")
108 fields = (
"name",
"args",
"targs")
111 fields = (
"type",
"name",
"args",
"targs",
"body",
"tspecial")
121_lowername =
lambda cls: cls.__name__.lower()
124 """Super-class for all classes that should walk over a tree of nodes.
125 This implements the visit() method.
132 """Walks over a node. If no node is provided, the tree is used."""
136 raise RuntimeError(
'no node or tree given!')
137 for clsname
in map(_lowername, type.mro(node.__class__)):
138 meth = getattr(self,
'visit_' + clsname,
None)
143 msg =
'could not find valid visitor method for {0} on {1}'
144 nodename = node.__class__.__name__
145 selfname = self.__class__.__name__
146 msg = msg.format(nodename, selfname)
152 raise AttributeError(msg)
156 """Formats a tree of nodes into a pretty string"""
159 super(PrettyFormatter, self).
__init__(tree=tree)
164 s = node.__class__.__name__ +
'('
165 if len(node.fields) == 0:
170 for field
in node.fields:
171 a = getattr(node, field)
172 t.append(self.
visit(a)
if isinstance(a, Node)
else pformat(a))
173 t = [
'{0}={1}'.format(n, x)
for n, x
in zip(node.fields, t)]
180 def __init__(self, tree=None, indent=' ', debug=False):
181 super(CppGen, self).
__init__(tree=tree)
193 s = self.
visit(node.type)
195 s += self.
visit(node.name)
199 s = self.
visit(node.child)
202 s =
"std::cout << \"HDF5_DEBUG: " + s.replace(
'"',
'\\"') +
"\" << std::endl;\n" + s
207 s = self.
visit(node.target)
209 s += self.
visit(node.value)
213 s = self.
visit(node.type)
215 s += self.
visit(node.target)
217 s += self.
visit(node.value)
221 s = self.
visit(node.x)
225 s += self.
visit(node.y)
230 s += self.
visit(node.name)
234 s = self.
visit(node.name)
244 s += self.
visit(node.cond)
253 s += self.
visit(node.cond)
260 for cond, body
in node.elifs:
262 s += self.
visit(cond)
270 if node.el
is not None:
278 if node.adecl
is not None:
279 s += self.
visit(node.adecl) +
";"
282 s += self.
visit(node.cond)
284 s += self.
visit(node.incr)
292 s = self.
visit(node.name)
293 if node.targs
is not None:
295 for i
in range(len(node.targs)):
296 s += self.
visit(node.targs[i])
297 if i < (len(node.targs)-1):
301 for i
in range(len(node.args)):
302 s += self.
visit(node.args[i])
303 if i < (len(node.args)-1):
312 elif node.targs
is not None:
314 for i
in range(len(node.targs)):
316 if i < (len(node.targs)-1):
321 s += self.
visit(node.type)
323 f =
FuncCall(name=node.name, args=node.args, targs=node.targs)
325 b =
Block(nodes=node.body)
341 """Translate unicode types into string types, if necessary.
343 This function exists to support Python 2.7.
347 item : int or str or list
348 The list of items, or item to potentially encode.
353 The same type as was passed to the function, encoded if necessary
355 if isinstance(item, str):
357 elif isinstance(item, tuple):
359 elif isinstance(item, list):
363 return item.encode(
'utf-8')
369 """Converts JSON list of lists to tuple of tuples.
373 raw_list : list or str
374 List to be converted, or str
379 Converted list, or str
381 if isinstance(raw_list, str):
386 """Return a list of a type's dependencies, each in canonical form.
391 the canonical form of the type
396 list of dependencies or str if base type is primitive
399 >>> list_dep("('PAIR', 'INT', 'VL_STRING')")
400 [('PAIR', 'INT', 'VL_STRING'), 'INT', 'VL_STRING']
402 if isinstance(canon, str):
405 dependency_list = [u
for u
in canon[1:]]
406 return [canon] + dependency_list
409 """Return a C++ variable, appropriately formatted for depth.
415 depth : int, optional
416 Depth of variable in relation to depth 0 type.
417 prefix : str, optional
418 New prefix to add, based on direct parent type.
426 return name + str(depth) + prefix
429 """Return the prefix of a C++ variable, appropriately formatted for depth.
434 Prefix of direct parent type.
438 Index relative to direct parent.
445 return base_prefix + template_args[parent_type.canon[0]][child_index]
448 """Represents C++ case statement.
455 Nodes of case statement body.
460 Complete case statement block.
462 if isinstance(read_x, Block):
467 node =
Case(cond=
Var(name=t.db), body=body)
471 """HDF5 Query: Represents necessary setup steps for C++ primitives."""
476 """HDF5 Query: Represents necessary setup steps for C++ String."""
477 nullpos =
"nullpos" + str(depth) + prefix
484 """HDF5 Query: Represents necessary setup steps for C++ VL_String."""
490template_args = {
"MAP": (
"key",
"val"),
494 "PAIR": (
"first",
"second")}
495variable_length_types = [
"MAP",
"LIST",
"SET",
"VECTOR"]
498def get_setup(t, depth=0, prefix="", HDF5_type="tb_type", child_index='j'):
499 """HDF5 Query: Get nodes representing C++ setup.
501 Primitive setups are called directly, while template types are handled
507 C++ type, canonical form.
508 depth : int, optional
509 Depth relative to initial, depth 0 type.
510 prefix : str, optional
511 Current prefix, determined by parent type.
513 hid_t type used to access HDF5 methods
514 child_index : str or int
515 Index into parent type, None if only child
520 Nodes required for type t setup.
526 if not child_index
is None:
527 field_type_var =
get_variable(
"fieldtype", depth=depth, prefix=prefix)
529 type=
Type(cpp=
"hid_t"),
530 target=
Var(name=field_type_var),
532 name=
Raw(code=
"H5Tget_member_type"),
533 args=[
Raw(code=HDF5_type),
534 Raw(code=str(child_index))])))
535 HDF5_type = field_type_var
537 total_size_var =
get_variable(
"total_size", depth=depth, prefix=prefix)
539 target=
Var(name=total_size_var),
541 name=
Raw(code=
"H5Tget_size"),
542 args=[
Raw(code=HDF5_type)])))
544 if t.canon ==
"STRING":
545 setup_nodes.append(
string_setup(depth=depth, prefix=prefix))
546 elif t.canon ==
"VL_STRING":
550 if not child_index
is None:
551 setup_nodes.append(field_type)
552 TEARDOWN_STACK.append(field_type_var)
553 setup_nodes.append(total_size)
554 node =
Block(nodes=setup_nodes)
558 target=
Var(name=total_size_var),
559 value=
Raw(code=
"CYCLUS_SHA1_SIZE"))))
560 return Block(nodes=setup_nodes)
562 multi_items = (len(t.canon[1:]) > 1)
564 children = len(t.canon) - 1
566 if not child_index
is None:
567 setup_nodes.append(field_type)
568 TEARDOWN_STACK.append(field_type_var)
570 setup_nodes.append(total_size)
572 if t.canon[0]
in variable_length_types:
573 fieldlen_var =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
575 type=
Type(cpp=
"hsize_t"),
576 name=
Var(name=fieldlen_var))),
578 name=
Raw(code=
"H5Tget_array_dims2"),
579 args=[
Raw(code=HDF5_type),
580 Raw(code=
"&"+fieldlen_var)]))])
581 setup_nodes.append(fieldlen)
585 type=
Type(cpp=
"hid_t"),
586 target=
Var(name=item_type_var),
588 name=
Raw(code=
"H5Tget_super"),
589 args=[
Raw(code=HDF5_type)])))
590 setup_nodes.append(item_type)
591 TEARDOWN_STACK.append(item_type_var)
592 HDF5_type = item_type_var
595 CANON_TO_NODE[new_type],
600 for new_type, part, index
in zip(
602 template_args[t.canon[0]],
603 [i
for i
in range(children)])]))
606 CANON_TO_NODE[new_type],
611 for new_type, part
in zip(
613 template_args[t.canon[0]])]))
616 CANON_TO_NODE[new_type],
621 for new_type, part, index
in zip(
623 template_args[t.canon[0]],
624 [i
for i
in range(children)])]))
625 node =
Block(nodes=setup_nodes)
629 """HDF5 Query: Get node representing C++ type declaration.
631 Declarations occur directly before bodies, created without recursion.
636 C++ type, canonical form.
637 depth : int, optional
638 Depth relative to initial, depth 0 type.
639 prefix : str, optional
640 Prefix determined by parent type.
645 Declaration statement as a node.
647 variable =
get_variable(
"x", depth=depth, prefix=prefix)
652 """HDF5 Query: Represents a body using the reinterpret_cast method.
654 This includes int, double, float, etc.
661 targs=[
Raw(code=t.cpp+
"*")],
662 args=[
Raw(code=base_offset)])))])
665def string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None):
666 """HDF5 Query: Represents body for the C++ String primitive."""
668 variable =
get_variable(
"x", depth=depth, prefix=prefix)
670 nullpos =
get_variable(
"nullpos", depth=depth, prefix=prefix)
672 total_size =
get_variable(
"total_size", depth=depth, prefix=prefix)
677 args=[
Raw(code=base_offset),
678 Raw(code=total_size)]))),
683 args=[
Raw(code=
"'\\0'")])))),
686 y=
Raw(code=
"npos"))),
689 args=[
Raw(code=nullpos)])))])])
694 """HDF5 Query: Represents the body for the VL_String primitive."""
697 variable =
get_variable(
"x", depth=depth, prefix=prefix)
702 args=[
Raw(code=base_offset)],
703 targs=[
Raw(code=t.cpp),
Raw(code=t.db)])))])
706def uuid_body(t, depth=0, prefix="", base_offset="buf+offset"):
707 """HDF5 Query: Represents the body for the boost::uuid primitive."""
709 total_size =
get_variable(
"total_size", depth=depth, prefix=prefix)
713 args=[
Raw(code=
"&"+x),
714 Raw(code=base_offset),
715 Raw(code=total_size)]))])
718def vl_body(t, depth=0, prefix="", base_offset="buf+offset"):
719 """HDF5 Query: Represents the body for all C++ VL types."""
723 args=[
Raw(code=base_offset)],
724 targs=[
Raw(code=t.cpp),
728def map_body(t, depth=0, prefix="", base_offset="buf+offset"):
729 """HDF5 Query: Represents the body for C++ map type."""
732 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
734 key = CANON_TO_NODE[t.canon[1]]
735 value = CANON_TO_NODE[t.canon[2]]
737 key_prefix = prefix + template_args[t.canon[0]][0]
738 key_name =
get_variable(
"x", depth=depth+1, prefix=key_prefix)
739 value_prefix = prefix + template_args[t.canon[0]][1]
740 value_name =
get_variable(
"x", depth=depth+1, prefix=value_prefix)
742 key_size =
get_variable(
"total_size", depth=depth+1, prefix=key_prefix)
743 value_size =
get_variable(
"total_size", depth=depth+1, prefix=value_prefix)
745 item_size =
"(" + key_size +
"+" + value_size +
")"
747 key_offset = base_offset +
"+" + item_size +
"*" + k
748 value_offset = key_offset +
"+" + key_size
753 value=
Raw(code=
"0")),
754 cond=
BinOp(x=
Var(name=k), op=
"<", y=
Var(name=fieldlen)),
757 get_body(key, depth=depth+1, prefix=key_prefix,
758 base_offset=key_offset),
759 get_body(value, depth=depth+1, prefix=value_prefix,
760 base_offset=value_offset),
762 value=
Raw(code=value_name)))])])
765def pair_body(t, depth=0, prefix="", base_offset="buf+offset"):
766 """HDF5 Query: Represents body for C++ pair type."""
769 item1 = CANON_TO_NODE[t.canon[1]]
770 item2 = CANON_TO_NODE[t.canon[2]]
772 item1_prefix = prefix + template_args[t.canon[0]][0]
773 item2_prefix = prefix + template_args[t.canon[0]][1]
774 item1_name =
get_variable(
"x", depth=depth+1, prefix=item1_prefix)
775 item2_name =
get_variable(
"x", depth=depth+1, prefix=item2_prefix)
777 item1_size =
get_variable(
"total_size", depth=depth+1, prefix=item1_prefix)
778 item2_size =
get_variable(
"total_size", depth=depth+1, prefix=item2_prefix)
780 item2_offset = base_offset +
"+" + item1_size
782 node =
Block(nodes=[
get_body(item1, depth=depth+1, prefix=item1_prefix,
783 base_offset=base_offset),
784 get_body(item2, depth=depth+1, prefix=item2_prefix,
785 base_offset=item2_offset),
789 args=[
Raw(code=item1_name),
790 Raw(code=item2_name)])))])
794 """HDF5 Query: Represents body of C++ Vector<primitive> types."""
797 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
798 total_size =
get_variable(
"total_size", depth=depth, prefix=prefix)
800 vector_start =
"&" + x +
"[0]"
804 name=
Raw(code=t.cpp),
805 args=[
Raw(code=fieldlen)]))),
807 args=[
Raw(code=vector_start),
808 Raw(code=base_offset),
809 Raw(code=total_size)]))])
813 """HDF5 Query: Represents body of C++ Vector<non-primitive> types."""
816 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
817 index = x +
"[" + k +
"]"
820 child_var =
get_variable(
"x", depth=depth+1, prefix=child_prefix)
822 child_size =
get_variable(
"total_size", depth=depth+1, prefix=child_prefix)
823 child_offset = base_offset +
"+" + child_size +
"*" + k
828 args=[
Raw(code=fieldlen)]))),
831 value=
Raw(code=
"0")),
832 cond=
BinOp(x=
Var(name=k), op=
"<", y=
Var(name=fieldlen)),
835 get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
837 base_offset=child_offset),
839 value=
Raw(code=child_var)))
844 """HDF5 Query: Represents body of C++ Vector<std::string> types."""
847 index = x +
"[" + k +
"]"
848 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
851 child_size =
get_variable(
"total_size", depth=depth+1, prefix=string_prefix)
852 child_offset = base_offset +
"+" + child_size +
"*" + k
857 args=[
Raw(code=fieldlen)]))),
860 value=
Raw(code=
"0")),
861 cond=
BinOp(x=
Var(name=k), op=
"<", y=
Var(name=fieldlen)),
864 string_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
865 prefix=string_prefix, base_offset=child_offset,
871 """HDF5 Query: Represents body of C++ set<primitive> types."""
874 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
876 xraw =
get_variable(
"xraw", depth=depth+1, prefix=child_prefix)
878 xraw_type = CANON_TO_NODE[t.canon[1]].cpp +
"*"
881 type=
Type(cpp=xraw_type),
882 target=
Var(name=xraw),
884 targs=[
Raw(code=xraw_type)],
885 args=[
Raw(code=base_offset)]))),
889 args=[
Raw(code=xraw),
896def set_body(t, depth=0, prefix="", base_offset="buf+offset"):
897 """HDF5 Query: Represents body of C++ set<non-primitive> types."""
901 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
904 child_var =
get_variable(
"x", depth=depth+1, prefix=child_prefix)
906 item_size =
get_variable(
"total_size", depth=depth+1, prefix=child_prefix)
908 child_offset = base_offset +
"+" + item_size +
"*" + k
913 value=
Raw(code=
"0")),
914 cond=
BinOp(x=
Var(name=k), op=
"<", y=
Var(name=fieldlen)),
917 get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
918 prefix=child_prefix, base_offset=child_offset),
920 args=[
Raw(code=child_var)]))])])
924 """HDF5 Query: Represents body of C++ set<std::string> types."""
928 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
932 prefix=string_prefix)
933 string_name =
get_variable(
"x", depth=depth+1, prefix=string_prefix)
935 offset = base_offset +
"+" + string_size +
"*" + k
940 value=
Raw(code=
"0")),
941 cond=
BinOp(x=
Var(name=k), op=
"<", y=
Var(name=fieldlen)),
944 string_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
945 prefix=string_prefix, base_offset=offset),
947 args=[
Raw(code=string_name)]))])])
951 """HDF5 Query: Represents body of C++ list<primitive> types."""
953 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
955 xraw =
get_variable(
"xraw", depth=depth+1, prefix=child_prefix)
957 xraw_type = CANON_TO_NODE[t.canon[1]].cpp +
"*"
960 type=
Type(cpp=xraw_type),
961 target=
Var(name=xraw),
963 targs=[
Raw(code=xraw_type)],
964 args=[
Raw(code=base_offset)]))),
968 args=[
Raw(code=xraw),
975def list_body(t, depth=0, prefix="", base_offset="buf+offset"):
976 """HDF5 Query: Represents body of C++ list<non-primitive> types."""
980 child_variable =
get_variable(
"x", depth=depth+1, prefix=child_prefix)
981 fieldlen =
get_variable(
"fieldlen", depth=depth, prefix=prefix)
982 item_size =
get_variable(
"total_size", depth=depth+1, prefix=child_prefix)
983 offset = base_offset +
"+" + item_size +
"*" + k
988 value=
Raw(code=
"0")),
989 cond=
BinOp(x=
Var(name=k), op=
"<", y=
Var(name=fieldlen)),
992 get_body(CANON_TO_NODE[t.canon[1]], depth=depth+1,
993 prefix=child_prefix, base_offset=offset),
995 args=[
Raw(code=child_variable)]))])])
998BODIES = {
"INT": reinterpret_cast_body,
999 "DOUBLE": reinterpret_cast_body,
1000 "FLOAT": reinterpret_cast_body,
1001 "BOOL": reinterpret_cast_body,
1003 "STRING": string_body,
1004 "VL_STRING": vl_body,
1006 "VECTOR_STRING": vec_string_body,
1009 "LIST_INT": list_primitive_body,
1010 "LIST_DOUBLE": list_primitive_body,
1011 "LIST_FLOAT": list_primitive_body,
1013 "SET_INT": set_primitive_body,
1014 "SET_DOUBLE": set_primitive_body,
1015 "SET_FLOAT": set_primitive_body,
1017 "VECTOR_INT": vector_primitive_body,
1018 "VECTOR_DOUBLE": vector_primitive_body,
1019 "VECTOR_FLOAT": vector_primitive_body,
1020 "VECTOR": vector_body}
1022def get_body(t, depth=0, prefix="", base_offset="buf+offset"):
1023 """HDF5 Query: Get body nodes for a C++ type.
1028 C++ type, canonical form.
1029 depth : int, optional
1030 Depth relative to initial, depth 0 type.
1031 prefix : str, optional
1032 Current prefix, determined by parent type.
1037 Body nodes required for the type.
1040 block.append(
get_decl(t, depth=depth, prefix=prefix))
1043 block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1044 base_offset=base_offset))
1046 block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1047 base_offset=base_offset))
1048 elif DB_TO_VL[t.db]:
1049 block.append(
vl_body(t, depth=depth, prefix=prefix,
1050 base_offset=base_offset))
1051 elif t.db
in BODIES:
1052 block.append(BODIES[t.db](t, depth=depth, prefix=prefix,
1053 base_offset=base_offset))
1054 elif t.canon[0]
in BODIES:
1055 block.append(BODIES[t.canon[0]](t, depth=depth, prefix=prefix,
1056 base_offset=base_offset))
1058 raise ValueError(
"No generation specified for type " + t.db)
1059 return Block(nodes=block)
1067 """Represents the generic close to an hdf5 type code block."""
1070 tree =
Block(nodes=[
1073 targs=[
Raw(code=t.cpp)],
1074 args=[
Raw(code=
"&"+x),
1075 Raw(code=
"&(field_conds[qr.fields[j]])")]))),
1076 If(cond=
Var(name=
"is_row_selected"),
1078 value=
Var(name=x)))])])
1080 for i
in range(len(TEARDOWN_STACK)):
1081 var_name = TEARDOWN_STACK.pop()
1083 args=[
Raw(code=var_name)]))
1084 tree.nodes.append(teardown)
1091 """This function copied from textwrap library version 3.3.
1093 Adds 'prefix' to the beginning of selected lines in 'text'.
1094 If 'predicate' is provided, 'prefix' will only be added to the lines
1095 where 'predicate(line)' is True. If 'predicate' is not provided,
1096 it will default to adding 'prefix' to all non-empty lines that do not
1097 consist solely of whitespace characters.
1099 if predicate
is None:
1100 def predicate(line):
1103 def prefixed_lines():
1104 for line
in text.splitlines(
True):
1105 yield (prefix + line
if predicate(line)
else line)
1106 return ''.join(prefixed_lines())
1109 node = CANON_TO_NODE[t]
1110 return FuncCall(name=
Raw(code=
"typeid"), args=[
Raw(code=node.cpp)])
1113 if t.db !=
"BLOB" and DB_TO_VL[t.db]:
1120 for i
in t.canon[1:]:
1121 ret = ret
and no_vl(CANON_TO_NODE[i])
1127 if isinstance(canon, str):
1138 tshape.append(jshape)
1142 if isinstance(canon, str):
1144 result = list(canon)
1147 while i < len(result):
1148 if isinstance(result[i], str):
1151 temp = result[i][1:]
1153 for j
in range(0, len(temp)):
1154 result.insert(i+j, temp[j])
1155 return tuple(result)
1158 """HDF5 Create: Generate C++ if-statement condition for a given type.
1160 These if-statements are always a string of boolean expressions of the form
1161 'shape[n]<1' or 'shape[n]>=1', where n is an index into the C++ shape array.
1162 A shape index less than one (<1) denotes a variable length type, whereas an
1163 index greater than one (>=1) denotes fixed length type. These boolean
1164 expressions are joined by '&&' operators. For instance, a type of
1165 VL_MAP_VL_STRING_VL_STRING would receive the condition
1166 'shape[0]<1 && shape[1]<1 && shape[2]<1'.
1171 C++ type for the boolean condition.
1175 current_bool : BinOp
1176 Node representing the boolean condition.
1179 vl_potential_count = 0
1184 flat_shape = zip(flat_canon, [x
for x
in range(shape_len)])
1186 for sub_type, index
in flat_shape:
1187 node = CANON_TO_NODE[sub_type]
1189 if DB_TO_VL[node.db]:
1191 vl_potential_count += 1
1192 op_list.append(
BinOp(x=
Raw(code=
"shape["+str(index)+
"]"),
1193 op=
"<", y=
Raw(code=
"1")))
1196 orig_type = ORIGIN_DICT[sub_type]
1198 if VARIATION_DICT[orig_type]:
1199 vl_potential_count += 1
1200 op_list.append(
BinOp(x=
Raw(code=
"shape["+str(index)+
"]"),
1201 op=
">=", y=
Raw(code=
"1")))
1203 if orig_type[0]
in variable_length_types:
1204 vl_potential_count += 1
1205 op_list.append(
BinOp(x=
Raw(code=
"shape["+str(index)+
"]"),
1206 op=
">=", y=
Raw(code=
"1")))
1207 current_bool = op_list[0]
1208 for i
in range(1,len(op_list)):
1209 current_bool =
BinOp(x=current_bool, op=
"&&", y=op_list[i])
1211 if vl_count == vl_potential_count:
1212 current_bool =
BinOp(x=
Raw(code=
"shape.empty()"), op=
"||",
1218 args=[
Raw(code=t.db)]),
1222 x=
Raw(code=
"vldts_["+t.db+
"]"),
1224 y=
Raw(code=
"H5Tvlen_create("+item_var+
")"))),
1226 name=
Raw(code=
"opened_types_.insert"),
1227 args=[
Raw(code=
"vldts_["+t.db+
"]")]))])
1231 """Generate C++ print statement for debugging generated code."""
1232 msg_string = t.db +
": got here: " + str(identifier)
1233 return ExprStmt(child=
Raw(code=
"std::cerr<<\"" + msg_string
1234 +
"\" << std::endl"))
1237 """HDF5 Create: Generate C++ if-statement body for a given type.
1239 Called in coordination with get_variation_cond. For a given C++ type, this
1240 function returns the necessary C++ statements to create the HDF5 version
1246 C++ type for which to create an if-statement body.
1251 Node containing necessary C++ statements for HDF5 creation.
1253 body =
Block(nodes=[])
1256 if t.db
in RAW_TYPES:
1257 return RAW_TYPES[t.db]
1259 body.nodes.append(
ExprStmt(child=
Raw(code=
"dbtypes[i]="+ t.db)))
1262 body.nodes.append(item_nodes)
1263 type_var = opened_types[-1]
if opened_types != []
else get_variable(
1268 is_vl =
True if DB_TO_VL[t.db]
else False
1272 y=size_expression)))
1276 y=
Raw(code=
"sha1_type_"))))
1280 y=
Raw(code=type_var))))
1281 for opened
in opened_types[:-1]:
1283 name=
Raw(code=
"opened_types_.insert"),
1284 args=[
Raw(code=opened)])))
1287 name=
Raw(code=
"opened_types_.insert"),
1288 args=[
Raw(code=
"field_types[i]")])))
1291HDF5_PRIMITIVES = {
"INT":
"H5T_NATIVE_INT",
1292 "DOUBLE":
"H5T_NATIVE_DOUBLE",
1293 "FLOAT":
"H5T_NATIVE_FLOAT",
1294 "BOOL":
"H5T_NATIVE_CHAR",
1295 "STRING":
"CreateFLStrType({size})",
1296 "BLOB":
"sha1_type_",
1297 "UUID":
"uuid_type_"}
1299PRIMITIVE_SIZES = {
"INT":
"sizeof(int)",
1300 "DOUBLE":
"sizeof(double)",
1301 "FLOAT":
"sizeof(float)",
1302 "BOOL":
"sizeof(char)",
1303 "VL_STRING":
"CYCLUS_SHA1_SIZE",
1304 "BLOB":
"CYCLUS_SHA1_SIZE",
1305 "UUID":
"CYCLUS_UUID_SIZE"}
1307VL_TO_FL_CONTAINERS = {
"VL_VECTOR":
"VECTOR",
1313 """HDF5 Create: Build specified HDF5 type, recursively if necessary.
1315 HDF5 types are Primitive, Compound, or Array. We handle each of these cases
1316 here differently. Primitives are immediately returned by querying the
1317 HDF5_PRIMITIVES dictionary. Compound types are made up of multiple
1318 Primitive or Compound types, so each of these child types must be declared
1319 and created before the parent type can be created. This is accomplished via
1320 recursion over every child type in the type's canon. It should be noted that
1321 Compound types depend heavily on the size of those types they contain,
1322 and this function relies on get_item_size for that information. Finally,
1323 Arrays can contain one Primitive or Compound type, and are created by
1324 specifying this child type, Array dimensions, and the Array length.
1329 Type node representing C++ type
1330 shape_array : list, optional
1331 Dimensioned list of current type shape
1332 prefix : str, optional
1333 Used to name C++ variables throughout multiple levels of recursion
1334 depth : int, optional
1335 Recursive depth counter
1340 Cumulative collection of nodes necessary for specified item type
1342 Cumulative collection of opened HDF5 types which must eventually be
1347 if shape_array ==
None:
1350 dim_shape = shape_array
1351 node =
Block(nodes=[])
1353 type_var =
get_variable(
"item_type", prefix=prefix, depth=depth)
1355 name=
Var(name=type_var))))
1357 if isinstance(t.canon, str):
1358 if DB_TO_VL[t.db]
or (t.canon ==
"STRING" and vl_flag):
1360 target=
Var(name=type_var),
1361 value=
Raw(code=
"sha1_type_"))))
1362 return node, opened_stack
1364 primitive_type =
Raw(code=HDF5_PRIMITIVES[t.db].format(size=
"shape["
1368 value=primitive_type)))
1369 return node, opened_stack
1372 container_type = t.canon[0]
1373 canon_shape = list(zip(t.canon, dim_shape))
1377 container_type = VL_TO_FL_CONTAINERS[t.canon[0]]
1380 if t.canon[0]
in variable_length_types:
1381 shape_var =
get_variable(
"shape0", prefix=
"", depth=depth+1)
1384 type=
Type(cpp=
"hsize_t"),
1385 target=
Var(name=shape_var),
1386 value=
Raw(code=
"shape["
1387 +str(dim_shape[0])+
"]"))))
1389 if len(canon_shape[1:]) == 1:
1391 item_canon, item_shape = canon_shape[1]
1393 child_array = (item_shape
if isinstance(item_shape, list)
1395 new_prefix = template_args[container_type][0]
1396 child_node, child_opened =
get_item_type(CANON_TO_NODE[item_canon],
1397 shape_array=child_array,
1401 pre_opened_len = len(opened_stack)
1402 node.nodes.append(child_node)
1403 opened_stack.extend(child_opened)
1404 if pre_opened_len < len(opened_stack):
1405 item_var = opened_stack[-1]
1407 item_var =
get_variable(
"item_type", prefix=new_prefix,
1412 child_dict = OrderedDict()
1414 for i
in range(1, len(canon_shape)):
1415 item_canon, item_shape = canon_shape[i]
1416 item_node = CANON_TO_NODE[item_canon]
1417 pre_opened_len = len(opened_stack)
1418 child_array = (item_shape
if isinstance(item_shape, list)
1420 new_prefix = template_args[container_type][i-1]
1422 shape_array=child_array,
1426 node.nodes.append(child_node)
1427 opened_stack.extend(child_opened)
1432 if (DB_TO_VL[t.db]
and item_canon[0]
in variable_length_types):
1436 elif DB_TO_VL[item_node.db]:
1441 if len(opened_stack) == pre_opened_len:
1448 child_item_var = opened_stack[-1]
1457 item_var =
get_variable(
"item_type", prefix=prefix+
'compound', depth=depth+1)
1459 name=
Raw(code=item_var))))
1464 opened_stack.append(item_var)
1466 node.nodes.append(
hdf5_insert(container_type, item_var, child_dict))
1468 if is_vl
and t.canon
not in NOT_VL:
1470 value=
Raw(code=
'sha1_type_'))))
1472 node.nodes.append(
VL_ADD_BLOCK(ORIGIN_TO_VL[ORIGIN_DICT[t.canon]], item_var))
1475 opened_stack.append(type_var)
1477 elif container_type
in variable_length_types
and not DB_TO_VL[t.db]:
1482 dims=
"&"+shape_var)))
1483 opened_stack.append(type_var)
1484 node.nodes.append(array_node)
1486 return node, opened_stack
1489 """Resolves item size recursively.
1491 We can dig down into a type until we reach eventual primitives, and then
1492 multiply the known sizes of those primitives by the lengths of their
1493 containers. Container length is defined in the C++ shape array.
1498 The type whose size is in question
1499 shape_array : list, optional
1500 Dimensioned list of shape array indicies, same shape as t.canon
1501 depth : int, optional
1502 Recursive depth counter
1507 String of C++ expression representing t's size.
1509 if shape_array ==
None:
1512 if t.db
in PRIMITIVE_SIZES.keys():
1513 return PRIMITIVE_SIZES[t.db]
1516 return "shape[" + str(shape_array[0]) +
"]"
1518 return "CYCLUS_SHA1_SIZE"
1521 if DB_TO_VL[t.db]
or (vl_flag
and t.canon
not in NOT_VL):
1522 size +=
"CYCLUS_SHA1_SIZE"
1525 if len(t.canon[1:]) > 1:
1527 for child_index
in range(1, len(t.canon)):
1528 child_array = shape_array[child_index]
1529 if not isinstance(child_array, list):
1530 child_array = [child_array]
1532 CANON_TO_NODE[t.canon[child_index]],
1536 size +=
"+".join(children)
1538 child_array = shape_array[1]
1539 if not isinstance(child_array, list):
1540 child_array = [child_array]
1541 size +=
get_item_size(CANON_TO_NODE[t.canon[1]], child_array,
1542 vl_flag=vl_flag, depth=depth+1)
1544 if t.canon[0]
in variable_length_types:
1545 size +=
"*" +
"shape[" + str(shape_array[0]) +
"]"
1550 """Node representation of the C++ H5Tarray_create2 method.
1555 Variable name of HDF5 array item.
1556 rank : int, optional
1557 Number of HDF5 array dimensions.
1558 dims : str, optional
1559 Variable (by reference) of shape array belonging to HDF5 array
1564 Node of H5Tarray_create2 function call.
1566 node =
FuncCall(name=
Var(name=
"H5Tarray_create2"),
1567 args=[
Raw(code=item_variable),
Raw(code=str(rank)),
1572 """Node representation of the C++ HDF5 compound type creation function.
1577 List of type sizes, all must be str type.
1582 H5Tcreate function call node.
1584 node =
FuncCall(name=
Var(name=
"H5Tcreate"), args=[
Raw(code=
"H5T_COMPOUND"),
1585 Raw(code=
"+".join(sizes))])
1589 """Node representation of the C++ H5Tinsert function.
1591 This function is used to identify partitions within an already established
1592 HDF5 Compound type. That is, we specify which inner types are located at
1593 what memory location within the Compound type.
1597 container_type : str
1598 Should be a key in the template_args dict
1600 C++ variable to which the function should refer
1601 types_sizes_dict : dict
1602 Dictionary of C++ type variables mapped to their size in memory
1607 Cumulative nodes for H5Tinsert function
1609 node =
Block(nodes=[])
1611 keys = list(types_sizes_dict.keys())
1612 for i
in range(len(types_sizes_dict)):
1614 type_size = types_sizes_dict[type_var]
1615 descriptor =
"\"" + template_args[container_type][i] +
"\""
1616 func =
FuncCall(name=
Var(name=
"H5Tinsert"), args=[])
1617 func.args.append(
Raw(code=compound_var))
1618 func.args.append(
Raw(code=descriptor))
1619 func.args.append(
Raw(code=buf))
1620 buf +=
"+" + type_size
1621 func.args.append(
Raw(code=type_var))
1622 node.nodes.append(
ExprStmt(child=func))
1626 """HDF5 Query: Generate Query case statement code."""
1627 CPPGEN =
CppGen(debug=
False)
1629 for type
in CANON_TYPES:
1630 type_node = CANON_TO_NODE[type]
1634 read_x =
Block(nodes=[setup, body, teardown])
1636 output =
indent(output, INDENT * 5)
1639io_error =
Raw(code=(
"throw IOError(\"the type for column \'\"+"
1640 "std::string(field_names[i])+\"\' is not yet supported "
1643raw_string =
Raw(code=(
"dbtypes[i]=STRING;\n"
1644 "field_types[i]=H5Tcopy(H5T_C_S1);\n"
1645 "H5Tset_size(field_types[i], shape[0]);\n"
1646 "H5Tset_strpad(field_types[i], H5T_STR_NULLPAD);\n"
1647 "opened_types_.insert(field_types[i]);\n"
1648 "dst_sizes[i]=sizeof(char)*shape[0];\n"))
1650raw_blob =
Raw(code=(
"dbtypes[i]=BLOB;\n"
1651 "field_types[i]=sha1_type_;\n"
1652 "dst_sizes[i]=CYCLUS_SHA1_SIZE;\n"))
1654RAW_TYPES = {
"STRING": raw_string,
1657DEBUG_TYPES = [
"VECTOR_STRING"]
1660 """HDF5 Create: Generate CreateTable if-statements."""
1661 CPPGEN =
CppGen(debug=
False)
1664 outer_if_bodies = OrderedDict()
1665 for n
in VARIATION_DICT.keys():
1666 outer_if_bodies[n] =
Block(nodes=[])
1668 for n
in VARIATION_DICT.keys():
1669 variations = VARIATION_DICT[n][:]
1670 key_node = CANON_TO_NODE[n]
1672 initial_type = variations.pop()
1677 for v
in variations],
1679 outer_if_bodies[n].nodes.append(sub_if)
1682 outer_if_bodies[n].nodes.append(lone_node)
1684 shape_line =
ExprStmt(child=
Raw(code=
"shape=shapes[i]"))
1686 initial_node, initial_body = outer_if_bodies.popitem()
1687 if_statement =
If(cond=
BinOp(x=
Var(name=
"valtype"), op=
"==",
1689 body=[shape_line, initial_body],
1690 elifs=[(
BinOp(x=
Var(name=
"valtype"), op=
"==",
1692 [shape_line, outer_if_bodies[t]])
1693 for t
in outer_if_bodies.keys()],
1695 output += CPPGEN.visit(if_statement)
1696 output =
indent(output, INDENT)
1700 parts = db.split(
"_")
1701 for i
in range(len(parts)):
1702 parts[i] = parts[i].capitalize()
1703 return "".join(parts)
1706 return "\"" + s +
"\""
1709 """HDF5 VL_DATASET: Generate the VLDataset function code."""
1712 origin_types = list(VARIATION_DICT.keys())
1713 for origin
in origin_types:
1714 vals = [v.canon
for v
in VARIATION_DICT[origin]
if DB_TO_VL[v.db]]
1715 origin_node = CANON_TO_NODE[origin]
1718 if DB_TO_VL[origin_node.db]:
1724 target=
Var(name=
"name"),
1727 output += CPPGEN.visit(
case_template(CANON_TO_NODE[v], case_body))
1729 output =
indent(output, INDENT*2)
1733 """HDF5 FILL_BUF: Generates the FillBuf function code."""
1736 for i
in CANON_TYPES:
1737 node = CANON_TO_NODE[i]
1738 write_to_buf =
FuncCall(name=
Var(name=
"WriteToBuf"),
1739 targs=[
Raw(code=node.db)],
1740 args=[
Raw(code=
"buf+offset"),
1741 Raw(code=
"shapes[col]"),
1742 Raw(code=
"a"),
Raw(code=
"sizes[col]")])
1743 case_body =
ExprStmt(child=write_to_buf)
1745 output =
indent(output, INDENT*4)
1748vl_write_vl_string =
"""hasher_.Clear();
1749hasher_.Update({var});
1750Digest {key} = hasher_.digest();
1751hid_t {keysds} = VLDataset({t.db}, true);
1752hid_t {valsds} = VLDataset({t.db}, false);
1753if (vlkeys_[{t.db}].count({key}) != 1) {{
1754 AppendVLKey({keysds}, {t.db}, {key});
1755 InsertVLVal({valsds}, {t.db}, {key}, {var});
1758vl_write_blob =
"""hasher_.Clear();
1759hasher_.Update({var});
1760Digest {key} = hasher_.digest();
1761hid_t {keysds} = VLDataset({t.db}, true);
1762hid_t {valsds} = VLDataset({t.db}, false);
1763if (vlkeys_[{t.db}].count({key}) != 1) {{
1764 AppendVLKey({keysds}, {t.db}, {key});
1765 InsertVLVal({valsds}, {t.db}, {key}, ({var}).str());
1768VL_SPECIAL_TYPES = {
"VL_STRING": vl_write_vl_string,
1769 "BLOB": vl_write_blob}
1771def vl_write(t, variable, depth=0, prefix="", pointer=False):
1772 """HDF5 Write: Return code previously found in VLWrite."""
1773 buf_variable =
get_variable(
"buf", depth=depth, prefix=prefix)
1774 key_variable =
get_variable(
"key", depth=depth, prefix=prefix)
1775 keysds_variable =
get_variable(
"keysds", depth=depth, prefix=prefix)
1776 valsds_variable =
get_variable(
"valsds", depth=depth, prefix=prefix)
1778 variable =
"*" + variable
1780 if t.db
in VL_SPECIAL_TYPES:
1781 node_str = VL_SPECIAL_TYPES[t.db]
1783 node_str =
"""hasher_.Clear();
1784hasher_.Update({var});
1785Digest {key} = hasher_.digest();
1786hid_t {keysds} = VLDataset({t.db}, true);
1787hid_t {valsds} = VLDataset({t.db}, false);
1788if (vlkeys_[{t.db}].count({key}) != 1) {{
1789 hvl_t {buf} = VLValToBuf({var});
1790 AppendVLKey({keysds}, {t.db}, {key});
1791 InsertVLVal({valsds}, {t.db}, {key}, {buf});
1793 node =
Raw(code=node_str.format(var=variable, no_p_var=variable.strip(
"*"),
1795 keysds=keysds_variable, t=t,
1796 valsds=valsds_variable,
1801 """HDF5 Write: Node representation of memcpy function."""
1807 """HDF5 Write: Node representation of memset function."""
1813 """HDF5 Write: Node representation of boost hold_any casting."""
1814 cast =
Block(nodes=[])
1816 if (
is_primitive(t)
and t.db
not in WRITE_BODY_PRIMITIVES
1817 and not DB_TO_VL[t.db]):
1819 name=
Var(name=val))))
1820 cast_string =
"a->castsmallvoid()"
1822 value=
Raw(code=cast_string))))
1824 cast_string =
"a->cast<" + t.cpp +
">()"
1827 target=
Var(name=val),
1828 value=
Raw(code=cast_string))))
1832 """HDF5 Write: Creates setup variables (lengths, sizes) for function body.
1834 This function recursively declares the sizes, lengths and other necessary
1835 variables for the parent and children types. Called by get_write_body.
1848 setup =
Block(nodes=[])
1849 variable =
get_variable(
"item_size", depth=depth, prefix=prefix)
1851 target=
Var(name=variable),
1854 if t.db ==
"STRING":
1855 valuelen =
get_variable(
"valuelen", depth=depth, prefix=prefix)
1857 name=
Var(name=(valuelen)))))
1862 container = t.canon[0]
1864 container = VL_TO_FL_CONTAINERS[container]
1865 elif t.canon[0]
in variable_length_types:
1866 length =
get_variable(
"length", depth=depth, prefix=prefix)
1868 type=
Type(cpp=
"size_t"),
1869 target=
Var(name=length),
1872 +str(shape_array[0])
1874 prefixes = template_args[container]
1877 for c, s, p
in zip(t.canon[1:], shape_array[1:], prefixes):
1878 node = CANON_TO_NODE[c]
1879 if isinstance(s, int):
1883 total_item_size =
get_variable(
"total_item_size", depth=depth,
1888 for i
in range(len(t.canon[1:])):
1889 children.append(
get_variable(
"item_size", depth=depth+1,
1890 prefix=prefix+prefixes[i]))
1891 if len(children) == 1:
1893 type=
Type(cpp=
"size_t"),
1894 target=
Var(name=total_item_size),
1895 value=
Raw(code=children[0]))))
1898 type=
Type(cpp=
"size_t"),
1899 target=
Var(name=total_item_size),
1901 code=
"+".join(children)))))
1903 if depth == 0
and not DB_TO_VL[t.db]:
1904 if container
in variable_length_types:
1905 variable =
get_variable(
"val", depth=depth, prefix=prefix)
1906 init = t.cpp +
"::iterator eraseit=" + variable +
".begin()"
1907 advance =
"std::advance(eraseit, column/" + total_item_size +
")"
1908 erase = variable +
".erase(eraseit," + variable +
".end())"
1909 column_check =
If(cond=
BinOp(x=
Raw(code=total_item_size+
"*"
1910 +variable+
".size()"),
1911 op=
">", y=
Raw(code=
'column')),
1915 setup.nodes.append(column_check)
1921 """HDF5 Write: Specialization of the write_body function for STRING type"""
1922 if variable
is None:
1923 variable =
get_variable(
"val", depth=depth, prefix=prefix)
1924 node =
Block(nodes=[])
1925 size =
"->size()" if pointer
else ".size()"
1926 c_str =
"->c_str()" if pointer
else ".c_str()"
1927 valuelen =
get_variable(
"valuelen", depth=depth, prefix=prefix)
1928 item_size =
get_variable(
"item_size", depth=depth, prefix=prefix)
1931 name=
Raw(code=
"std::min"),
1933 Raw(code=variable+size),
1934 Raw(code=item_size)]))))
1935 node.nodes.append(
memcpy(offset, variable+c_str, valuelen))
1936 node.nodes.append(
memset(offset+
"+"+valuelen,
"0", item_size+
"-"+valuelen))
1941 """HDF5 Write: Specialization of the write_body function for UUID type"""
1942 if variable
is None:
1943 variable =
get_variable(
"val", depth=depth, prefix=prefix)
1944 node =
Block(nodes=[])
1945 size =
get_variable(
"item_size", depth=depth, prefix=prefix)
1947 variable =
"*" + variable
1948 variable =
"&(" + variable +
")"
1949 node.nodes.append(
memcpy(offset, variable, size))
1954 """HDF5 Write: Specialization of the write_body function for primitives"""
1955 if variable
is None:
1956 variable =
get_variable(
"val", depth=depth, prefix=prefix)
1957 node =
Block(nodes=[])
1958 size =
get_variable(
"item_size", depth=depth, prefix=prefix)
1963 variable =
"*" + variable
1964 variable =
"&(" + variable +
")"
1965 node.nodes.append(
memcpy(offset, variable, size))
1968WRITE_BODY_PRIMITIVES = {
"STRING": write_body_string,
1969 "UUID": write_body_uuid}
1971CONTAINER_INSERT_STRINGS = {
"MAP":
"{var}[{child0}] = {child1}",
1972 "LIST":
"{var}.push_back({child0})",
1973 "SET":
"{var}.insert({child0})",
1974 "VECTOR":
"{var}.push_back({child0})",
1975 "PAIR":
"{var} = std::make_pair({child0},{child1})"}
1978 """HDF5 Write: Determines if type is entirely VL.
1980 A type is entirely VL if the top level type is VL, as well as all children
1981 that have the potential to be VL. This means that VL_VECTOR_INT will return
1982 True here, but VL_VECTOR_STRING will return False.
1991 True if type is entirely VL, else False
1998 for i
in range(len(flat)):
2000 node = CANON_TO_NODE[canon]
2001 if DB_TO_VL[node.db]:
2007 elif node.canon
in NOT_VL:
2014def pad_children(t, variable, fixed_var=None, depth=0, prefix="", called_depth=0):
2015 """HDF5 Write: Pads FL children of VL parent types.
2017 This function is used on top-level VL container types which contain 1 or
2018 more FL child types (i.e. VL_VECTOR_STRING). These children should be
2019 padded to their max length if they do not already meet it. This is done
2026 fixed_var : None or str, optional
2027 depth : int, optional
2028 prefix : str, optional
2029 called_depth : int, optional
2030 Records the origin depth to determine when we're at relative depth=0
2035 Nodes required for padding
2038 container = VL_TO_FL_CONTAINERS[t.canon[0]]
2040 container = t.canon[0]
2041 result =
Block(nodes=[])
2047 if fixed_var ==
None:
2048 fixed_var =
get_variable(
"fixed_val", depth=depth, prefix=prefix)
2050 name=
Raw(code=fixed_var))))
2051 iterator =
get_variable(
"it", depth=depth, prefix=prefix)
2052 pad_count =
get_variable(
"pad_count", depth=depth, prefix=prefix)
2054 type=
Type(cpp=
'unsigned int'),
2055 target=
Var(name=pad_count),
2056 value=
Raw(code=
'0'))))
2057 prefixes = template_args[container]
2058 keywords[
'var'] = fixed_var
2059 num = len(t.canon[1:])
2061 children = [
"(*" + iterator +
")"]
2063 if container
in variable_length_types:
2064 members = [
'->first',
'->second']
2065 children = [
"{}{}".format(a, b)
for a, b
in zip([iterator]*num,
2068 members = [
'.first',
'.second']
2069 children = [
"{}{}".format(a, b)
for a, b
in zip([variable]*num,
2072 for i
in t.canon[1:]:
2073 child_node = CANON_TO_NODE[i]
2074 child_keyword =
"child" + str(count)
2076 prefix=prefix+prefixes[count])
2080 prefix=prefix+prefixes[count])
2081 if child_node.db ==
'STRING':
2082 constructor = (
"std::string(" + children[count] +
",0,"
2087 name=child_variable),
2089 code=constructor))))
2090 keywords[child_keyword] = child_variable
2093 keywords[child_keyword] = children[count]
2096 if DB_TO_VL[child_node.db]:
2099 keywords[child_keyword] = children[count]
2102 body_nodes.append(
pad_children(child_node, children[count],
2103 fixed_var=child_variable,
2107 called_depth=called_depth))
2108 keywords[child_keyword] = child_variable
2110 elif child_node.canon[0]
in variable_length_types:
2112 prefix=prefix+prefixes[count])
2113 child_pad_count =
get_variable(
"pad_count", depth=depth+1,
2114 prefix=prefix+prefixes[count])
2115 body_nodes.append(
pad_children(child_node, children[count],
2116 fixed_var=child_variable,
2118 prefix=prefix+prefixes[count],
2119 called_depth=called_depth))
2121 if child_node.canon[0] ==
'VECTOR':
2122 body_nodes.append(
ExprStmt(child=
Raw(code=child_variable+
".resize("+child_length+
")")))
2123 size =
"(" + child_length +
"-" + child_pad_count +
")" +
"*" + item_size
2124 body_nodes.append(
memset(
"&"+child_variable, str(0), size))
2125 keywords[child_keyword] = child_variable
2131 body_nodes.append(
pad_children(child_node, children[count],
2132 fixed_var=child_variable,
2134 prefix=prefix+prefixes[count],
2135 called_depth=called_depth))
2136 keywords[child_keyword] = child_variable
2139 assignment = CONTAINER_INSERT_STRINGS[container].format(**keywords)
2140 body_nodes.append(
ExprStmt(child=
Raw(code=assignment)))
2141 if container
in variable_length_types:
2142 body_nodes.append(
ExprStmt(child=
Raw(code=
"++" + pad_count)))
2143 if depth > called_depth:
2147 target=
Raw(code=iterator),
2148 value=
Raw(code=variable
2150 result.nodes.append(
For(cond=
BinOp(x=
Var(name=iterator), op=
"!=",
2151 y=
Var(name=variable+
".end()")),
2152 incr=
Raw(code=
"++" + iterator),
2155 result.nodes.extend(body_nodes)
2159 offset="buf", pointer=False):
2160 """HDF5 Write: Generates the body of the WriteToBuf function definition.
2165 Node representing the desired C++ type
2167 Dimensioned list of shape array indicies corresponding to types in
2169 depth : int, optional
2171 prefix : str, optional
2172 Used for recursive variable naming convention
2173 variable : str, optional
2174 Name of the type's C++ variable
2175 offset : str, optional
2176 Location of current memory offset
2177 pointer : bool, optional
2178 Denotes if current variable is a pointer, and whether member access
2179 should be performed via arrow or dot notation
2184 Nodes required for body of the function definition
2186 result =
Block(nodes=[])
2191 variable =
get_variable(
"val", depth=depth, prefix=prefix)
2192 result.nodes.append(
a_cast(t))
2197 result.nodes.append(
vl_write(t, variable, depth=depth, prefix=prefix,
2200 result.nodes.append(
memcpy(offset, key +
".val",
"CYCLUS_SHA1_SIZE"))
2204 if t.db
in WRITE_BODY_PRIMITIVES:
2205 result.nodes.append(WRITE_BODY_PRIMITIVES[t.db](t, depth=depth,
2220 count =
get_variable(
"count", depth=depth, prefix=prefix)
2222 type=
Type(cpp=
"unsigned int"),
2223 target=
Var(name=count),
2224 value=
Raw(code=
"0"))))
2225 iterator =
get_variable(
"it", depth=depth, prefix=prefix)
2226 total_size =
get_variable(
"total_item_size", depth=depth, prefix=prefix)
2229 container = t.canon[0]
2232 container = VL_TO_FL_CONTAINERS[container]
2233 prefixes = template_args[container]
2235 type=
Type(cpp=t.cpp+
"::iterator"),
2236 target=
Raw(code=iterator),
2237 value=
Raw(code=variable
2239 result.nodes.append(
pad_children(t, variable, depth=depth,
2240 prefix=prefix, called_depth=depth))
2241 fixed_val =
get_variable(
"fixed_val", depth=depth, prefix=prefix)
2242 result.nodes.append(
vl_write(t, fixed_val, depth=depth,
2245 result.nodes.append(
memcpy(offset, key +
".val",
"CYCLUS_SHA1_SIZE"))
2248 new_variable = variable
2249 if container
in variable_length_types:
2250 new_variable = iterator
2251 prefixes = template_args[container]
2252 if len(t.canon[1:]) == 1:
2253 child_node = CANON_TO_NODE[t.canon[1]]
2255 prefix=prefix+prefixes[0])
2258 prefix=prefix+prefixes[0],
2259 variable=new_variable,
2260 offset=offset+
"+"+child_size
2265 if container
in variable_length_types:
2266 labels = [
'->first',
'->second']
2268 labels = [
'->first',
'->second']
2270 labels = [
'.first',
'.second']
2271 for c, s, p, l
in zip(t.canon[1:], shape_array[1:], prefixes,
2273 child_node = CANON_TO_NODE[c]
2274 item_label = new_variable+l
2280 variable=item_label,
2281 offset=offset+
"+("+count
2286 partial_size +=
"+" + child_size
2287 if container
in variable_length_types:
2288 labels = [
'->first',
'->second']
2293 target=
Raw(code=new_variable),
2294 value=
Raw(code=variable
2299 result.nodes.append(
For(cond=
BinOp(x=
Var(name=new_variable),
2303 incr=
Raw(code=
"++" + new_variable),
2308 dest = offset +
"+" + total_size +
"*" + count
2309 length = (total_size +
"*" +
"(" + container_length +
"-"
2312 result.nodes.append(
If(cond=
BinOp(
2313 x=
Raw(code=total_size+
"*"
2315 op=
"<", y=
Raw(code=
"column")),
2316 body=[
memset(dest, str(0), length)]))
2318 result.nodes.append(
memset(dest, str(0), length))
2320 result.nodes.extend(child_bodies)
2324 """HDF5 Write: Generate the WriteToBuf templated function definitions."""
2325 CPPGEN =
CppGen(debug=
False)
2327 for i
in CANON_TYPES:
2328 block =
Block(nodes=[])
2329 t = CANON_TO_NODE[i]
2331 name=
Var(name=
"Hdf5Back::WriteToBuf"),
2332 targs=[
Raw(code=t.db)],
2333 args=[
Decl(type=
Type(cpp=
"char*"), name=
Var(name=
"buf")),
2334 Decl(type=
Type(cpp=
"std::vector<int>&"),
2335 name=
Var(name=
"shape")),
2337 cpp=
"const boost::spirit::hold_any*"),
2338 name=
Var(name=
"a")),
2340 name=
Var(name=
"column"))],
2343 block.nodes.append(node)
2344 output += CPPGEN.visit(block)
2348 """HDF5 VAL_TO_BUF and BUF_TO_VAL: Generate setup for both functions.
2350 This setup is to be called one time for each type. It returns nodes for
2351 initial buffer/type declaration, item sizes, and a potentially dimensioned
2352 list describing which child types within the initial container t are VL.
2353 These are denoted by a 1, where fixed-length primitive types are denoted by
2354 a 0. Fixed-length containers (i.e. pairs) are denoted by a nested list of
2361 depth : int, optional
2362 Recursive depth counter, used for variable names.
2363 prefix : str, optional
2364 Current prefix, used for variable name uniqueness.
2365 spec : str or None, optional
2366 Determines whether extra nodes are added for VAL_TO_BUF or BUF_TO_VAL
2373 Potentially dimensioned list cooresponding to child types, with values
2374 of 0 and 1 representing FL and VL types, respectively.
2376 node =
Block(nodes=[])
2379 if spec ==
'TO_BUF':
2381 name=
Var(name=
"buf"))))
2383 target=
Var(name=
"buf.len"),
2384 value=
Raw(code=
"x.size()"))))
2385 elif spec ==
'TO_VAL':
2389 type=
Type(cpp=
'char*'),
2390 target=
Var(name=
'p'),
2394 child_sizes = OrderedDict()
2395 container = t.canon[0]
2396 prefixes = template_args[container]
2397 children = t.canon[1:]
2400 for c, p
in zip(children, prefixes):
2401 child_node = CANON_TO_NODE[c]
2402 variable =
get_variable(
"item_size", depth=depth+1, prefix=prefix+p)
2404 if child_node.db ==
"BLOB":
2405 child_sizes[variable] =
"CYCLUS_SHA1_SIZE"
2407 elif VARIATION_DICT[c]:
2408 child_sizes[variable] =
"CYCLUS_SHA1_SIZE"
2411 child_sizes[variable] = PRIMITIVE_SIZES[child_node.db]
2413 elif child_node.canon[0]
in variable_length_types:
2414 child_sizes[variable] =
"CYCLUS_SHA1_SIZE"
2417 child_sizes[variable] =
get_variable(
"total_item_size", depth+1,
2421 node.nodes.append(new_node)
2422 vl_list.append(new_list)
2424 for k, v
in child_sizes.items():
2427 value=
Raw(code=v))))
2428 total_var =
get_variable(
"total_item_size", depth=depth, prefix=prefix)
2430 type=
Type(cpp=
"size_t"),
2431 target=
Raw(code=total_var),
2433 code=
"+".join(child_sizes.keys())))))
2436 if spec ==
'TO_BUF':
2438 type=
Type(cpp=
"size_t"),
2439 target=
Var(name=
"nbytes"),
2440 value=
Raw(code=total_var
2443 target=
Var(name=
"buf.p"),
2445 code=
"new char[nbytes]"))))
2446 return node, vl_list
2449 offset="reinterpret_cast<char*>(buf.p)
"):
2450 """HDF5 VAL_TO_BUF: Generates the body of the VLValToBuf function.
2452 The VLValToBuf function creates a new VL buffer from an original C++ data
2453 type. All potentially variable length types are passed to VLWrite and a
2454 SHA1 hash is added to the buffer in place of the actual type data.
2455 Primitives and remaining container types are written as-is to the buffer.
2460 Node representing current C++ type
2462 Potentially dimensioned list of 1's and 0's, corresponding to each
2463 child type and whether it is variable length or not, respectively.
2464 depth : int, optional
2465 Current recursive depth, used for naming variables.
2466 prefix : str, optional
2467 Current variable prefix, used to ensure unique variable names.
2468 variable : str, optional
2469 Current container variable name.
2470 offset : str or None, optional
2471 Current offset into data.
2476 Nodes representing the body.
2478 if variable ==
None:
2480 block =
Block(nodes=[])
2481 total_size_var =
get_variable(
"total_item_size", depth=depth, prefix=prefix)
2482 prefixes = template_args[t.canon[0]]
2483 children = t.canon[1:]
2484 loop_block =
Block(nodes=[])
2489 if t.canon[0]
in variable_length_types:
2490 count_var =
get_variable(
"count", depth=depth, prefix=prefix)
2492 cpp=
"unsigned int"),
2493 target=
Var(name=count_var),
2494 value=
Raw(code=
"0"))))
2495 iter_var =
get_variable(
"it", depth=depth, prefix=prefix)
2498 +
"::const_iterator"),
2499 target=
Var(name=iter_var),
2500 value=
Raw(code=variable
2502 new_variable = iter_var
2503 labels = [
'->first',
'->second']
2504 new_offset +=
"+(" + total_size_var +
"*" + count_var +
")"
2506 labels = [
'.first',
'.second']
2507 new_variable = variable
2510 if len(children) == 1:
2511 new_variable =
"(*" + new_variable +
")"
2513 for child, part, vl, label
in zip(children, prefixes, vl_list, labels):
2514 child_node = CANON_TO_NODE[child]
2515 child_var = new_variable + label
2516 item_size =
get_variable(
"item_size", depth=depth+1, prefix=prefix+part)
2519 loop_block.nodes.append(
vl_write(ORIGIN_TO_VL[child], child_var,
2520 depth=depth+1, prefix=prefix+part))
2521 key_var =
get_variable(
"key", depth=depth+1, prefix=prefix+part)
2522 loop_block.nodes.append(
memcpy(new_offset, key_var+
".val",
2526 loop_block.nodes.append(
memcpy(new_offset,
"&("+child_var+
")",
2530 loop_block.nodes.append(
to_buf_body(child_node, vl, depth=depth+1,
2531 prefix=prefix+part, variable=child_var,
2534 new_offset +=
"+" + item_size
2537 if t.canon[0]
in variable_length_types:
2538 block.nodes.append(
For(cond=
BinOp(x=
Var(name=iter_var), op=
"!=",
2539 y=
Var(name=variable+
".end()")),
2540 incr=
Raw(code=
"++"+iter_var),
2545 name=count_var)))]))
2548 block.nodes.append(loop_block)
2550 block.nodes.append(
ExprStmt(child=
Raw(code=
"return buf")))
2554 """HDF5 VAL_TO_BUF: Generates VLValToBuf function."""
2555 CPPGEN =
CppGen(debug=
False)
2557 block =
Block(nodes=[])
2558 for i
in VARIATION_DICT:
2559 if VARIATION_DICT[i]:
2560 t = CANON_TO_NODE[i]
2561 if t.canon[0]
in variable_length_types:
2564 name=
Var(name=
"Hdf5Back::VLValToBuf"),
2565 args=[
Decl(type=
Type(cpp=
"const "+t.cpp+
"&"),
2566 name=
Var(name=
"x"))],
2568 block.nodes.append(node)
2569 output += CPPGEN.visit(block)
2573 """HDF5 VAL_TO_BUF_H: Generates header declarations for VLValToBuf function."""
2576 block =
Block(nodes=[])
2577 for i
in VARIATION_DICT:
2578 if VARIATION_DICT[i]:
2579 node = CANON_TO_NODE[i]
2580 if node.canon[0]
in variable_length_types:
2583 name=
Var(name=
"VLValToBuf"),
2588 name=
Raw(code=
"x"))])))
2589 block.nodes.append(decl)
2590 output += CPPGEN.visit(block)
2591 output =
indent(output, INDENT)
2595 """Representation of C++ VLRead function.
2602 Memory location of SHA1 hash.
2607 The final function call.
2610 targs=[
Raw(code=t.cpp),
2611 Raw(code=ORIGIN_TO_VL[t.canon].db)],
2612 args=[
Raw(code=offset)])
2616 """Representation of C++ reinterpret_cast function.
2621 C++ type to cast as.
2623 Memory location of the data to cast.
2624 deref : bool, optional
2625 Should the function be dereferenced? (This returns the newly casted
2626 data, rather than a pointer)
2631 The final function call.
2634 func_name =
'*reinterpret_cast'
2636 func_name =
'reinterpret_cast'
2638 targs=[
Raw(code=t.cpp+
'*')], args=[
Raw(code=offset)])
2641def to_val_body(t, vl_list, depth=0, prefix='', variable='x0', offset=None):
2642 """Generates the body of the VLBufToVal function.
2644 The VLBufToVal function is responsible for reading the bytes of a VL buffer
2645 back into a C++ value. Importantly, we assume that all types which have the
2646 capability of being VL *are* VL. When we encounter one of these types, we
2647 call VLRead, passing in the respective SHA1 hash value. Otherwise, we read
2648 in the fixed length number of bytes associated with the type.
2653 Node representing current C++ type
2655 Potentially dimensioned list of 1's and 0's, corresponding to each
2656 child type and whether it is variable length or not, respectively.
2657 depth : int, optional
2658 Current recursive depth, used for naming variables.
2659 prefix : str, optional
2660 Current variable prefix, used to ensure unique variable names.
2661 variable : str, optional
2662 Current container variable name.
2663 offset : str or None, optional
2664 Current offset into buffer.
2669 Nodes representing the body.
2671 block =
Block(nodes=[])
2675 args[
'var'] = variable
2676 total_item_size =
get_variable(
'total_item_size', depth=depth,
2678 count =
get_variable(
'count', depth=depth, prefix=prefix)
2681 offset =
'p+' +
"(" + total_item_size +
'*' + count +
")"
2682 container = t.canon[0]
2683 loop_block =
Block(nodes=[])
2684 for child, part, vl
in zip(t.canon[1:], template_args[t.canon[0]], vl_list):
2685 type_node = CANON_TO_NODE[child]
2686 child_var =
get_variable(
'x', depth=depth+1, prefix=prefix+part)
2689 child_arg =
'child' + str(child_count)
2694 target=
Var(name=child_var),
2701 target=
Var(name=child_var),
2711 name=
Var(name=child_var))))
2712 loop_block.nodes.append(
to_val_body(type_node, vl, depth=depth+1,
2715 variable=child_var))
2716 args[child_arg] = child_var
2717 offset +=
"+" + child_size
2722 container_expr = CONTAINER_INSERT_STRINGS[container].format(**args)
2723 if container
in variable_length_types:
2725 cpp=
'unsigned int'),
2726 target=
Var(name=count),
2727 value=
Raw(code=
'0'))))
2728 block.nodes.append(
For(cond=
BinOp(x=
Var(name=count), op=
'<',
2729 y=
Var(name=
'buf.len')),
2730 incr=
Raw(code=
'++'+count),
2734 block.nodes.append(loop_block)
2735 block.nodes.append(
ExprStmt(child=
Raw(code=container_expr)))
2737 block.nodes.append(
ExprStmt(child=
Raw(code=
'return ' + variable)))
2741 """HDF5 BUF_TO_VAL: Generates the VLBufToVal function code."""
2744 block =
Block(nodes=[])
2745 for i
in VARIATION_DICT:
2746 if VARIATION_DICT[i]:
2747 t = CANON_TO_NODE[i]
2748 if t.canon[0]
in variable_length_types:
2751 name=
Var(name=
"Hdf5Back::VLBufToVal"),
2752 targs=[
Raw(code=t.cpp)],
2753 args=[
Decl(type=
Type(cpp=
"const hvl_t&"),
2754 name=
Var(name=
"buf"))],
2757 block.nodes.append(node)
2758 output += CPPGEN.visit(block)
2763 global MAIN_DISPATCH
2765 with open(os.path.join(os.path.dirname(__file__),
'..',
'share',
2766 'dbtypes.json'))
as f:
2772 for row
in range(len(raw_table)):
2773 current = tuple(raw_table[row])
2774 if current[4] ==
"HDF5":
2775 if current[5] > version:
2776 version = current[5]
2778 if current[5] == version:
2781 types_table = list(tuple(row)
for row
in raw_table[table_start:table_end+1])
2783 for row
in types_table:
2784 if row[6] == 1
and row[4] ==
"HDF5" and row[5] == version:
2788 if canon
not in CANON_TYPES:
2789 CANON_TYPES.append(canon)
2791 CANON_TO_DB[canon] = db
2792 CANON_TO_NODE[canon] =
Type(cpp=cpp, db=db, canon=canon)
2793 DB_TO_VL[db] = row[8]
2795 fixed_length_types = []
2796 for n
in CANON_TYPES:
2797 if no_vl(CANON_TO_NODE[n])
and n
not in fixed_length_types:
2798 fixed_length_types.append(n)
2800 for n
in fixed_length_types:
2801 key = CANON_TO_NODE[n]
2803 for x
in CANON_TYPES:
2804 val_node = CANON_TO_NODE[x]
2805 if val_node.cpp == key.cpp
and val_node.db != key.db:
2806 vals.append(val_node)
2807 VARIATION_DICT[n] = vals
2809 VARIATION_DICT[
'BLOB'] = []
2810 VARIATION_DICT[
'STRING'] = [CANON_TO_NODE[
'VL_STRING']]
2812 for i
in VARIATION_DICT:
2814 if VARIATION_DICT[i] != []:
2815 for j
in VARIATION_DICT[i]:
2816 ORIGIN_DICT[j.canon] = i
2818 for i
in VARIATION_DICT.keys():
2819 node = CANON_TO_NODE[i]
2820 if DB_TO_VL[node.db]:
2823 if i[0]
not in variable_length_types:
2825 for j
in VARIATION_DICT[i]:
2826 NOT_VL.append(j.canon)
2827 if not VARIATION_DICT[i]:
2829 NOT_VL = set(NOT_VL)
2831 for n
in CANON_TYPES:
2832 node = CANON_TO_NODE[n]
2834 ORIGIN_TO_VL[ORIGIN_DICT[n]] = node
2836 MAIN_DISPATCH = {
"QUERY": main_query,
2837 "CREATE": main_create,
2838 "VL_DATASET": main_vl_dataset,
2839 "FILL_BUF": main_fill_buf,
2840 "WRITE": main_write,
2841 "VAL_TO_BUF_H": main_val_to_buf_h,
2842 "VAL_TO_BUF": main_val_to_buf,
2843 "BUF_TO_VAL": main_buf_to_val}
2847 gen_instruction = sys.argv[1]
2849 raise ValueError(
"No generation instruction provided")
2855 function = MAIN_DISPATCH[gen_instruction]
2858if __name__ ==
'__main__':
visit_exprstmt(self, node)
visit_funccall(self, node)
visit_nothing(self, node)
visit_declassign(self, node)
__init__(self, tree=None, indent=' ', debug=False)
visit_funcdef(self, node)
visit_leftunaryop(self, node)
visit_rightunaryop(self, node)
__init__(self, tree=None)
vl_write(t, variable, depth=0, prefix="", pointer=False)
a_cast(t, depth=0, prefix="")
get_write_setup(t, shape_array, depth=0, prefix="")
set_string_body(t, depth=0, prefix="", base_offset="buf+offset")
list_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")
vl_string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None)
map_body(t, depth=0, prefix="", base_offset="buf+offset")
pad_children(t, variable, fixed_var=None, depth=0, prefix="", called_depth=0)
list_body(t, depth=0, prefix="", base_offset="buf+offset")
reinterpret_cast(t, offset, deref=False)
get_item_size(t, shape_array=None, vl_flag=False, depth=0)
get_setup(t, depth=0, prefix="", HDF5_type="tb_type", child_index='j')
to_from_buf_setup(t, depth=0, prefix="", spec=None)
indent(text, prefix, predicate=None)
hdf5_array_create(item_variable, rank=1, dims="&shape0")
write_body_string(t, depth=0, prefix="", variable=None, offset="buf", pointer=False)
write_body_primitive(t, depth=0, prefix="", variable=None, offset="buf", pointer=False)
vector_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")
pair_body(t, depth=0, prefix="", base_offset="buf+offset")
vl_body(t, depth=0, prefix="", base_offset="buf+offset")
get_item_type(t, shape_array=None, vl_flag=False, prefix="", depth=0)
VL_ADD_BLOCK(t, item_var)
get_dim_shape(canon, start=0, depth=0)
set_primitive_body(t, depth=0, prefix="", base_offset="buf+offset")
vec_string_body(t, depth=0, prefix="", base_offset="buf+offset")
string_body(t, depth=0, prefix="", base_offset="buf+offset", variable=None)
string_setup(depth=0, prefix="")
print_statement(t, identifier)
write_body_uuid(t, depth=0, prefix="", variable=None, offset="buf", pointer=False)
vector_body(t, depth=0, prefix="", base_offset="buf+offset")
to_buf_body(t, vl_list, depth=0, prefix="", variable=None, offset="reinterpret_cast<char*>(buf.p)")
to_val_body(t, vl_list, depth=0, prefix='', variable='x0', offset=None)
convert_canonical(raw_list)
reinterpret_cast_body(t, depth=0, prefix="", base_offset="buf+offset")
vl_string_setup(depth=0, prefix="")
set_body(t, depth=0, prefix="", base_offset="buf+offset")
get_body(t, depth=0, prefix="", base_offset="buf+offset")
hdf5_insert(container_type, compound_var, types_sizes_dict)
get_decl(t, depth=0, prefix="")
get_variable(name, depth=0, prefix="")
get_prefix(base_prefix, parent_type, child_index)
get_write_body(t, shape_array, depth=0, prefix="", variable="a", offset="buf", pointer=False)
primitive_setup(t, depth=0, prefix="")
hdf5_create_compound(sizes)
uuid_body(t, depth=0, prefix="", base_offset="buf+offset")