Skip to content

Commit 7383539

Browse files
committed
DPL Analysis: add string expression parsing
* Fix binding node hash handling * Add tokenizer/parser to handle expressions in string form * Features supported * column binding, requires full string i.e. `o2::aod::track::pt` * arithmetic operations, all functions supported by normal expressions, conditional expressions, parentheses * standard math constants (PI, PIHalf, etc.)
1 parent 50d26e3 commit 7383539

File tree

8 files changed

+977
-262
lines changed

8 files changed

+977
-262
lines changed

Framework/Core/include/Framework/ASoA.h

Lines changed: 192 additions & 193 deletions
Large diffs are not rendered by default.

Framework/Core/include/Framework/BasicOps.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
// or submit itself to any jurisdiction.
1111
#ifndef O2_FRAMEWORK_BASICOPS_H_
1212
#define O2_FRAMEWORK_BASICOPS_H_
13+
#include <array>
14+
#include <string_view>
15+
#include "CommonConstants/MathConstants.h"
1316

1417
namespace o2::framework
1518
{
@@ -46,6 +49,64 @@ enum BasicOp : unsigned int {
4649
BitwiseNot,
4750
Conditional // 3-ar functions
4851
};
52+
53+
static constexpr std::array<std::string_view, BasicOp::Conditional + 1> mapping{
54+
"&&",
55+
"||",
56+
"+",
57+
"-",
58+
"/",
59+
"*",
60+
"&",
61+
"|",
62+
"^",
63+
"<",
64+
"<=",
65+
">",
66+
">=",
67+
"==",
68+
"!=",
69+
"natan2",
70+
"npow",
71+
"nsqrt",
72+
"nexp",
73+
"nlog",
74+
"nlog10",
75+
"nsin",
76+
"ncos",
77+
"ntan",
78+
"nasin",
79+
"nacos",
80+
"natan",
81+
"nabs",
82+
"nround",
83+
"nbitwise_not",
84+
"ifnode"
85+
};
86+
87+
static constexpr std::array<std::string_view, 9> mathConstants{
88+
"Almost0",
89+
"Epsilon",
90+
"Almost1",
91+
"VeryBig",
92+
"PI",
93+
"TwoPI",
94+
"PIHalf",
95+
"PIThird",
96+
"PIQuarter"
97+
};
98+
99+
static constexpr std::array<float, 9> mathConstantsValues{
100+
o2::constants::math::Almost0,
101+
o2::constants::math::Epsilon,
102+
o2::constants::math::Almost1,
103+
o2::constants::math::VeryBig,
104+
o2::constants::math::PI,
105+
o2::constants::math::TwoPI,
106+
o2::constants::math::PIHalf,
107+
o2::constants::math::PIThird,
108+
o2::constants::math::PIQuarter
109+
};
49110
} // namespace o2::framework
50111

51112
#endif // O2_FRAMEWORK_BASICOPS_H_

Framework/Core/include/Framework/ExpressionHelpers.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,26 @@ struct DatumSpec {
2525
size_t hash = 0;
2626
atype::type type = atype::NA;
2727

28-
explicit DatumSpec(size_t index, atype::type type_) : datum{index}, type{type_} {}
29-
explicit DatumSpec(LiteralNode::var_t literal, atype::type type_) : datum{literal}, type{type_} {}
30-
explicit DatumSpec(std::string binding, size_t hash_, atype::type type_) : datum{binding}, hash{hash_}, type{type_} {}
28+
explicit constexpr DatumSpec(size_t index, atype::type type_) : datum{index}, type{type_} {}
29+
explicit constexpr DatumSpec(LiteralNode::var_t literal, atype::type type_) : datum{literal}, type{type_} {}
30+
explicit constexpr DatumSpec(std::string binding, size_t hash_, atype::type type_) : datum{binding}, hash{hash_}, type{type_} {}
3131
DatumSpec() = default;
3232
DatumSpec(DatumSpec const&) = default;
3333
DatumSpec(DatumSpec&&) = default;
3434
DatumSpec& operator=(DatumSpec const&) = default;
3535
DatumSpec& operator=(DatumSpec&&) = default;
36-
};
3736

38-
bool operator==(DatumSpec const& lhs, DatumSpec const& rhs);
37+
bool operator==(DatumSpec const& rhs) const
38+
{
39+
bool eqValue = this->datum == rhs.datum;
40+
bool eqHash = true;
41+
if (this->datum.index() == 3 && eqValue) {
42+
eqHash = this->hash == rhs.hash;
43+
}
44+
bool eqType = this->type == rhs.type;
45+
return eqValue && eqHash && eqType;
46+
}
47+
};
3948

4049
std::ostream& operator<<(std::ostream& os, DatumSpec const& spec);
4150

Framework/Core/include/Framework/Expressions.h

Lines changed: 132 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -110,32 +110,43 @@ std::string upcastTo(atype::type f);
110110

111111
/// An expression tree node corresponding to a literal value
112112
struct LiteralNode {
113+
LiteralNode()
114+
: value{-1},
115+
type{atype::INT32}
116+
{
117+
118+
}
113119
template <typename T>
114120
LiteralNode(T v) : value{v}, type{selectArrowType<T>()}
115121
{
116122
}
117123

118-
LiteralNode(LiteralNode const& other) = default;
119-
120124
using var_t = LiteralValue::stored_type;
121125
var_t value;
122126
atype::type type = atype::NA;
123127
};
124128

125129
/// An expression tree node corresponding to a column binding
126130
struct BindingNode {
131+
constexpr BindingNode()
132+
: name{nullptr},
133+
hash{0},
134+
type{atype::FLOAT}
135+
{
136+
}
127137
BindingNode(BindingNode const&) = default;
128138
BindingNode(BindingNode&&) = delete;
129139
constexpr BindingNode(const char* name_, uint32_t hash_, atype::type type_) : name{name_}, hash{hash_}, type{type_} {}
140+
constexpr BindingNode(uint32_t hash_, atype::type type_) : name{nullptr}, hash{hash_}, type{type_} {}
130141
const char* name;
131142
uint32_t hash;
132143
atype::type type;
133144
};
134145

135146
/// An expression tree node corresponding to binary or unary operation
136147
struct OpNode {
137-
OpNode(BasicOp op_) : op{op_} {}
138-
OpNode(OpNode const& other) = default;
148+
OpNode() : op{BasicOp::Abs} {}
149+
OpNode(BasicOp op_) : op{op_} {}
139150
BasicOp op;
140151
};
141152

@@ -155,8 +166,6 @@ struct PlaceholderNode : LiteralNode {
155166
retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{static_cast<AT>(context.options().get<T>(name))}; };
156167
}
157168

158-
PlaceholderNode(PlaceholderNode const& other) = default;
159-
160169
void reset(InitContext& context)
161170
{
162171
value = retrieve(context, name.data());
@@ -180,8 +189,6 @@ struct ParameterNode : LiteralNode {
180189
{
181190
}
182191

183-
ParameterNode(ParameterNode const&) = default;
184-
185192
template <typename T>
186193
void reset(T value_, int index_ = -1)
187194
{
@@ -221,14 +228,19 @@ struct Node {
221228
{
222229
}
223230

224-
Node(Node&& n) : self{std::forward<self_t>(n.self)}, left{std::forward<std::unique_ptr<Node>>(n.left)}, right{std::forward<std::unique_ptr<Node>>(n.right)}, condition{std::forward<std::unique_ptr<Node>>(n.condition)}
231+
Node(Node&& n) : self{std::forward<self_t>(n.self)}, left{std::forward<std::unique_ptr<Node>>(n.left)}, right{std::forward<std::unique_ptr<Node>>(n.right)}, condition{std::forward<std::unique_ptr<Node>>(n.condition)}, binding{std::forward<std::string>(n.binding)}
225232
{
226233
}
227234

228235
Node(BindingNode const& n) : self{n}, left{nullptr}, right{nullptr}, condition{nullptr}
229236
{
230237
}
231238

239+
Node(BindingNode const& n, std::string binding_) : self{n}, left{nullptr}, right{nullptr}, condition{nullptr}, binding{binding_}
240+
{
241+
get<BindingNode>(self).name = binding.c_str();
242+
}
243+
232244
Node(ParameterNode&& p) : self{std::forward<ParameterNode>(p)}, left{nullptr}, right{nullptr}, condition{nullptr}
233245
{
234246
}
@@ -239,12 +251,24 @@ struct Node {
239251
right{std::make_unique<Node>(std::forward<Node>(else_))},
240252
condition{std::make_unique<Node>(std::forward<Node>(condition_))} {}
241253

254+
Node(ConditionalNode op, Node&& then_, std::unique_ptr<Node>&& else_, Node&& condition_)
255+
: self{op},
256+
left{std::make_unique<Node>(std::forward<Node>(then_))},
257+
right{std::forward<std::unique_ptr<Node>>(else_)},
258+
condition{std::make_unique<Node>(std::forward<Node>(condition_))} {}
259+
242260
Node(OpNode op, Node&& l, Node&& r)
243261
: self{op},
244262
left{std::make_unique<Node>(std::forward<Node>(l))},
245263
right{std::make_unique<Node>(std::forward<Node>(r))},
246264
condition{nullptr} {}
247265

266+
Node(OpNode op, std::unique_ptr<Node>&& l, Node&& r)
267+
: self{op},
268+
left{std::forward<std::unique_ptr<Node>>(l)},
269+
right{std::make_unique<Node>(std::forward<Node>(r))},
270+
condition{nullptr} {}
271+
248272
Node(OpNode op, Node&& l)
249273
: self{op},
250274
left{std::make_unique<Node>(std::forward<Node>(l))},
@@ -264,6 +288,10 @@ struct Node {
264288
if (other.condition != nullptr) {
265289
condition = std::make_unique<Node>(*other.condition);
266290
}
291+
binding = other.binding;
292+
if (!binding.empty()) {
293+
get<BindingNode>(self).name = binding.c_str();
294+
}
267295
}
268296

269297
/// variant with possible nodes
@@ -274,6 +302,9 @@ struct Node {
274302
std::unique_ptr<Node> left = nullptr;
275303
std::unique_ptr<Node> right = nullptr;
276304
std::unique_ptr<Node> condition = nullptr;
305+
306+
/// buffer for dynamic binding
307+
std::string binding;
277308
};
278309

279310
/// helper struct used to parse trees
@@ -315,20 +346,24 @@ void walk(Node* head, L&& pred)
315346
}
316347
}
317348

349+
/// helper concepts
350+
template <typename T>
351+
concept arithmetic = std::is_arithmetic_v<T>;
352+
318353
/// overloaded operators to build the tree from an expression
319354

320355
#define BINARY_OP_NODES(_operator_, _operation_) \
321356
inline Node operator _operator_(Node&& left, Node&& right) \
322357
{ \
323358
return Node{OpNode{BasicOp::_operation_}, std::forward<Node>(left), std::forward<Node>(right)}; \
324359
} \
325-
template <typename T> \
326-
inline Node operator _operator_(Node&& left, T right) requires(std::is_arithmetic_v<std::decay_t<T>>) \
360+
template <arithmetic T> \
361+
inline Node operator _operator_(Node&& left, T right) \
327362
{ \
328363
return Node{OpNode{BasicOp::_operation_}, std::forward<Node>(left), LiteralNode{right}}; \
329364
} \
330-
template <typename T> \
331-
inline Node operator _operator_(T left, Node&& right) requires(std::is_arithmetic_v<std::decay_t<T>>) \
365+
template <arithmetic T> \
366+
inline Node operator _operator_(T left, Node&& right) \
332367
{ \
333368
return Node{OpNode{BasicOp::_operation_}, LiteralNode{left}, std::forward<Node>(right)}; \
334369
} \
@@ -382,15 +417,15 @@ BINARY_OP_NODES(&&, LogicalAnd);
382417
BINARY_OP_NODES(||, LogicalOr);
383418

384419
/// functions
385-
template <typename T>
386-
inline Node npow(Node&& left, T right) requires(std::is_arithmetic_v<T>)
420+
template <arithmetic T>
421+
inline Node npow(Node&& left, T right)
387422
{
388423
return Node{OpNode{BasicOp::Power}, std::forward<Node>(left), LiteralNode{right}};
389424
}
390425

391426
#define BINARY_FUNC_NODES(_func_, _node_) \
392-
template <typename L, typename R> \
393-
inline Node _node_(L left, R right) requires(std::is_arithmetic_v<L> && std::is_arithmetic_v<R>) \
427+
template <arithmetic L, arithmetic R> \
428+
inline Node _node_(L left, R right) \
394429
{ \
395430
return Node{OpNode{BasicOp::_func_}, LiteralNode{left}, LiteralNode{right}}; \
396431
} \
@@ -469,20 +504,20 @@ inline Node ifnode(Node&& condition_, Node&& then_, Node&& else_)
469504
return Node{ConditionalNode{}, std::forward<Node>(then_), std::forward<Node>(else_), std::forward<Node>(condition_)};
470505
}
471506

472-
template <typename L>
473-
inline Node ifnode(Node&& condition_, Node&& then_, L else_) requires(std::is_arithmetic_v<L>)
507+
template <arithmetic L>
508+
inline Node ifnode(Node&& condition_, Node&& then_, L else_)
474509
{
475510
return Node{ConditionalNode{}, std::forward<Node>(then_), LiteralNode{else_}, std::forward<Node>(condition_)};
476511
}
477512

478-
template <typename L>
479-
inline Node ifnode(Node&& condition_, L then_, Node&& else_) requires(std::is_arithmetic_v<L>)
513+
template <arithmetic L>
514+
inline Node ifnode(Node&& condition_, L then_, Node&& else_)
480515
{
481516
return Node{ConditionalNode{}, LiteralNode{then_}, std::forward<Node>(else_), std::forward<Node>(condition_)};
482517
}
483518

484-
template <typename L1, typename L2>
485-
inline Node ifnode(Node&& condition_, L1 then_, L2 else_) requires(std::is_arithmetic_v<L1>&& std::is_arithmetic_v<L2>)
519+
template <arithmetic L1, arithmetic L2>
520+
inline Node ifnode(Node&& condition_, L1 then_, L2 else_)
486521
{
487522
return Node{ConditionalNode{}, LiteralNode{then_}, LiteralNode{else_}, std::forward<Node>(condition_)};
488523
}
@@ -644,6 +679,80 @@ std::shared_ptr<gandiva::Projector> createProjectors(framework::pack<C...>, std:
644679
}
645680

646681
void updateFilterInfo(ExpressionInfo& info, std::shared_ptr<arrow::Table>& table);
682+
683+
/*
684+
* The formal grammar for framework expressions.
685+
* Operations are in the order of increasing priority.
686+
* Identifier includes namespaces, e.g. o2::aod::track::pt.
687+
*
688+
* top ::= primary
689+
*
690+
* primary ::= tier1 ('||' tier1)*
691+
* tier1 ::= tier2 ('&&' tier2)*
692+
* tier2 ::= tier3 ('|' tier3)*
693+
* tier3 ::= tier4 ('^' tier4)*
694+
* tier4 ::= tier5 ('&' tier5)*
695+
* tier5 ::= tier6 (('=='|'!=') tier6)*
696+
* tier6 ::= tier7 (('<'|'>'|'<='|'>=') tier7)*
697+
* tier7 ::= tier8 (('+'|'-') tier8)*
698+
* tier8 ::= base (('*'|'/') base)*
699+
*
700+
* base ::= identifier
701+
* | number
702+
* | function_call
703+
* | '(' primary ')'
704+
*
705+
* number ::= -?[0-9]+(\.[0-9]*)?([uf])?
706+
* identifier ::= [a-zA-Z][a-zA-Z0-9_]* ('::' [a-zA-Z][a-zA-Z0-9_]*)*
707+
* function_call ::= identifier '(' (primary (',' primary)*)? ')'
708+
*/
709+
710+
/// String parsing
711+
enum Token : int {
712+
EoL = -1,
713+
Identifier = -2,
714+
IntegerNumber = -3,
715+
FloatNumber = -4,
716+
BinaryOp = -5,
717+
Unexpected = -100
718+
};
719+
720+
struct Tokenizer {
721+
std::string source;
722+
std::string::iterator current;
723+
std::string IdentifierStr;
724+
std::string BinaryOpStr;
725+
std::string StrValue;
726+
std::string TokenStr;
727+
std::variant<uint32_t, int32_t, uint64_t, int64_t> IntegerValue;
728+
std::variant<float, double> FloatValue;
729+
char LastChar;
730+
int currentToken = Token::Unexpected;
731+
732+
Tokenizer(std::string const& input = "");
733+
void reset(std::string const& input);
734+
[[maybe_unused]] int nextToken();
735+
void pop();
736+
char peek();
737+
};
738+
739+
struct Parser
740+
{
741+
static Node parse(std::string const& input);
742+
static std::unique_ptr<Node> parsePrimary(Tokenizer & tk);
743+
static std::unique_ptr<Node> parseTier1(Tokenizer& tk);
744+
static std::unique_ptr<Node> parseTier2(Tokenizer& tk);
745+
static std::unique_ptr<Node> parseTier3(Tokenizer& tk);
746+
static std::unique_ptr<Node> parseTier4(Tokenizer& tk);
747+
static std::unique_ptr<Node> parseTier5(Tokenizer& tk);
748+
static std::unique_ptr<Node> parseTier6(Tokenizer& tk);
749+
static std::unique_ptr<Node> parseTier7(Tokenizer& tk);
750+
static std::unique_ptr<Node> parseTier8(Tokenizer& tk);
751+
static std::unique_ptr<Node> parseBase(Tokenizer& tk);
752+
753+
static OpNode opFromToken(std::string const& token);
754+
};
755+
647756
} // namespace o2::framework::expressions
648757

649758
#endif // O2_FRAMEWORK_EXPRESSIONS_H_

0 commit comments

Comments
 (0)