Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions src/Query/ADQL_parser.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,11 @@ struct ADQL_parser : boost::spirit::qi::grammar<std::string::const_iterator,
column_name, ordering_specification, set_function_type, table_name,
tap_upload, tap_upload_identifier, unqualified_schema_name, catalog_name,
correlation_name, qualifier, trig_one_arg_names, math_zero_arg_names,
math_one_arg_names, math_two_arg_names, cast_as, user_defined_function_name,
math_one_arg_names, math_two_arg_names, cast_as, whitelisted_function_name,
unsigned_literal, unsigned_value_specification, general_literal,
null_literal, null_cast, boolean_literal, qualifier_star, binary_operators,
geo_one_arg_names, geo_two_arg_names, with_table_name, with_column_name,
possibly_qualified_identifier, table_valued_function_name,
table_valued_function_param, sql_no_arg_function;
possibly_qualified_identifier, sql_no_arg_function;

// rules with skipper
boost::spirit::qi::rule<std::string::const_iterator, std::string(),
Expand Down Expand Up @@ -236,13 +235,13 @@ struct ADQL_parser : boost::spirit::qi::grammar<std::string::const_iterator,
boost::spirit::ascii::space_type>
position_function;

boost::spirit::qi::rule<std::string::const_iterator, ADQL::User_Defined_Function(),
boost::spirit::qi::rule<std::string::const_iterator, ADQL::Whitelisted_Function(),
boost::spirit::ascii::space_type>
user_defined_function;
whitelisted_function;

boost::spirit::qi::rule<std::string::const_iterator, ADQL::Value_Expression(),
boost::spirit::ascii::space_type>
value_expression, user_defined_function_param, pattern;
value_expression, whitelisted_function_param, pattern;

boost::spirit::qi::rule<std::string::const_iterator,
ADQL::Value_Expression_Non_Bool_Term(),
Expand Down Expand Up @@ -348,10 +347,6 @@ struct ADQL_parser : boost::spirit::qi::grammar<std::string::const_iterator,
boost::spirit::ascii::space_type>
table_reference;

boost::spirit::qi::rule<std::string::const_iterator, ADQL::Table_Valued_Function(),
boost::spirit::ascii::space_type>
table_valued_function;

boost::spirit::qi::rule<std::string::const_iterator,
std::vector<ADQL::Table_Reference>(),
boost::spirit::ascii::space_type>
Expand Down
149 changes: 116 additions & 33 deletions src/Query/ADQL_parser/ADQL_parser/init_factor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@ void ADQL_parser::init_factor() {
using boost::spirit::qi::digit;
using boost::spirit::qi::double_;
using boost::spirit::qi::hold;
using boost::spirit::qi::labels::_1;
using boost::spirit::qi::labels::_2;
using boost::spirit::qi::labels::_3;
using boost::spirit::qi::labels::_val;
using boost::spirit::qi::lexeme;
using boost::spirit::qi::lit;
using boost::spirit::qi::lower;
using boost::spirit::qi::no_skip;
using boost::spirit::qi::omit;
using boost::spirit::qi::print;
using boost::spirit::qi::ulong_long;
using boost::spirit::qi::labels::_1;
using boost::spirit::qi::labels::_2;
using boost::spirit::qi::labels::_3;
using boost::spirit::qi::labels::_val;
namespace ascii = boost::spirit::ascii;

set_function_type %= ascii::no_case[ascii::string("AVG")] |
Expand Down Expand Up @@ -62,8 +62,8 @@ void ADQL_parser::init_factor() {
ascii::no_case["ELSE"] >> &no_skip[boost::spirit::qi::space] >> result;
else_clause.name("else_clause");

/// boost::spirit gets wonky if I try to use the '>' operator for
/// simple_whens
// boost::spirit gets wonky if I try to use the '>' operator for
// simple_whens
simple_case %=
value_expression >> simple_whens >> -else_clause > ascii::no_case["END"];
simple_case.name("simple_case");
Expand Down Expand Up @@ -124,33 +124,115 @@ void ADQL_parser::init_factor() {
*array_index;
value_expression_primary.name("value_expression_primary");

/// Custom array_expression so that SQL 99 array literals can pass
/// through
// Custom array_expression so that SQL 99 array literals can pass
// through
array_constructor %=
ascii::no_case["ARRAY"] >> '[' >> (value_expression % ',') > ']';
array_constructor.name("array_constructor");

/// We do not have a rule for default_function_prefix since, being
/// optional, it does not change whether something parses.

/// Add a bunch of functions that are normally reserved words, but
/// also really useful string functions (at least in Postgres)
user_defined_function_name %= regular_identifier |
ascii::no_case[ascii::string("RIGHT")] |
ascii::no_case[ascii::string("LEFT")] |
ascii::no_case[ascii::string("UPPER")] |
ascii::no_case[ascii::string("LOWER")] |
ascii::no_case[ascii::string("DISTINCT")] |
ascii::no_case[ascii::string("TRIM")];
user_defined_function_name.name("user_defined_function_name");

user_defined_function_param %= value_expression;
user_defined_function_param.name("user_defined_function_param");

user_defined_function %= hold[user_defined_function_name >> '('] >>
-(user_defined_function_param % ',') >> ')';

user_defined_function.name("user_defined_function");
// Reverse-sorted within groups as in init_reserved_word() to
// prevent early matches. Could be reverse-sorted in one big group
// if necessary.

// Note: Despite their potential for misuse in blind injection
// attacks, the functions SUBSTR, SUBSTRING, and INSTR are
// whitelisted because they have legitimate uses in TAP
// queries. The functions ASCII() and CHR(), though, are omitted
// from the whitelist as of 22May26.

whitelisted_function_name %=
// IRSA UDFs
ascii::no_case[ascii::string("STRIP_URL_PREFIX")] |
ascii::no_case[ascii::string("SIA2_CLOUD_ACCESS_COLUMN")] |
ascii::no_case[ascii::string("SIA1_CLOUD_ACCESS_COLUMN")] |
ascii::no_case[ascii::string("RA_TO_SEXAGESIMAL")] |
ascii::no_case[ascii::string("PT_TO_REGION")] |
ascii::no_case[ascii::string("POLY_TO_REGION")] |
ascii::no_case[ascii::string("POLY_TO_RA")] |
ascii::no_case[ascii::string("POLY_TO_DEC")] |
ascii::no_case[ascii::string("GET_MOCS")] |
ascii::no_case[ascii::string("GET_CONTENTTYPE_SORT_SURROGATE")] |
ascii::no_case[ascii::string("EXTRACT_URL_BASENAME")] |
ascii::no_case[ascii::string("DEC_TO_SEXAGESIMAL")] |

// PostgreSQL/SQL functions
ascii::no_case[ascii::string("TYPEOF")] |
ascii::no_case[ascii::string("TO_TIMESTAMP")] |
ascii::no_case[ascii::string("TO_NUMBER")] |
ascii::no_case[ascii::string("TO_DATE")] |
ascii::no_case[ascii::string("TO_CHAR")] |
ascii::no_case[ascii::string("TIMEZONE")] |
ascii::no_case[ascii::string("SYSDATE")] |
ascii::no_case[ascii::string("SUBSTRING")] |
ascii::no_case[ascii::string("SUBSTR")] |
ascii::no_case[ascii::string("STRPOS")] |
ascii::no_case[ascii::string("STRING_AGG")] |
ascii::no_case[ascii::string("STDDEV")] |
ascii::no_case[ascii::string("SPLIT_PART")] |
ascii::no_case[ascii::string("REPLACE")] |
ascii::no_case[ascii::string("RANDOM")] |
ascii::no_case[ascii::string("NOW")] |
ascii::no_case[ascii::string("MEDIAN")] |
ascii::no_case[ascii::string("MD5")] | ascii::no_case[ascii::string("LN")] |
ascii::no_case[ascii::string("LENGTH")] |
ascii::no_case[ascii::string("LEAST")] |
ascii::no_case[ascii::string("JSONB_EXTRACT_PATH_TEXT")] |
ascii::no_case[ascii::string("JSON_EXTRACT_PATH_TEXT")] |
ascii::no_case[ascii::string("INSTR")] |
ascii::no_case[ascii::string("GREATEST")] |
ascii::no_case[ascii::string("GETDATE")] |
ascii::no_case[ascii::string("FORMAT")] |
ascii::no_case[ascii::string("FLOOR")] |
ascii::no_case[ascii::string("CONCAT")] |
ascii::no_case[ascii::string("CLOCK_TIMESTAMP")] |
ascii::no_case[ascii::string("CHAR_LENGTH")] |
ascii::no_case[ascii::string("CEILING")] |
ascii::no_case[ascii::string("CEIL")] |

// PostGIS type constructors
ascii::no_case[ascii::string("GEOMETRY")] |
ascii::no_case[ascii::string("GEOGRAPHY")] |

// ADQL reserved words that are also valid function names
ascii::no_case[ascii::string("SQRT")] |
ascii::no_case[ascii::string("ROUND")] |
ascii::no_case[ascii::string("POWER")] |
ascii::no_case[ascii::string("MOD")] |
ascii::no_case[ascii::string("LOG10")] |
ascii::no_case[ascii::string("LOG")] |
ascii::no_case[ascii::string("EXP")] |
ascii::no_case[ascii::string("COORD2")] |
ascii::no_case[ascii::string("COORD1")] |
ascii::no_case[ascii::string("ABS")] |

// SQL reserved words that are also valid function names
ascii::no_case[ascii::string("UPPER")] |
ascii::no_case[ascii::string("TRIM")] |
ascii::no_case[ascii::string("SUM")] |
ascii::no_case[ascii::string("RIGHT")] |
ascii::no_case[ascii::string("LOWER")] |
ascii::no_case[ascii::string("LEFT")] |
ascii::no_case[ascii::string("DISTINCT")] |
ascii::no_case[ascii::string("CAST")] |

// ST_ prefix — PostGIS functions
(ascii::no_case[ascii::string("ST_")] >> all_identifiers) |

// ivo_ prefix — IVOA functions
(ascii::no_case[ascii::string("ivo_")] >> all_identifiers) |

// q3c_ prefix — Q3C spatial indexing functions
(ascii::no_case[ascii::string("q3c_")] >> all_identifiers);

whitelisted_function_name.name("whitelisted_function_name");

whitelisted_function_param %= value_expression;
whitelisted_function_param.name("whitelisted_function_param");

whitelisted_function %= hold[whitelisted_function_name >> '('] >>
-(whitelisted_function_param % ',') >> ')';

whitelisted_function.name("whitelisted_function");

sql_no_arg_function %= ascii::no_case[ascii::string("CURRENT_TIMESTAMP")];

Expand Down Expand Up @@ -182,7 +264,8 @@ void ADQL_parser::init_factor() {
// numeric_geometry_function
numeric_value_function %= trig_function | math_function | cast_function |
position_function | non_predicate_geometry_function |
user_defined_function | sql_no_arg_function;
whitelisted_function | sql_no_arg_function;

numeric_value_function.name("numeric_value_function");
// Flipped the order here, because a value_expression can match a
// function name.
Expand Down Expand Up @@ -216,9 +299,9 @@ void ADQL_parser::init_factor() {
BOOST_SPIRIT_DEBUG_NODE(null_cast);
BOOST_SPIRIT_DEBUG_NODE(value_expression_primary);
BOOST_SPIRIT_DEBUG_NODE(array_constructor);
BOOST_SPIRIT_DEBUG_NODE(user_defined_function_name);
BOOST_SPIRIT_DEBUG_NODE(user_defined_function_param);
BOOST_SPIRIT_DEBUG_NODE(user_defined_function);
BOOST_SPIRIT_DEBUG_NODE(whitelisted_function_name);
BOOST_SPIRIT_DEBUG_NODE(whitelisted_function_param);
BOOST_SPIRIT_DEBUG_NODE(whitelisted_function);
BOOST_SPIRIT_DEBUG_NODE(cast_function);
BOOST_SPIRIT_DEBUG_NODE(position_function);
BOOST_SPIRIT_DEBUG_NODE(numeric_value_function);
Expand Down
22 changes: 5 additions & 17 deletions src/Query/ADQL_parser/ADQL_parser/init_predicate.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,24 @@ void ADQL_parser::init_predicate() {
using boost::spirit::qi::digit;
using boost::spirit::qi::double_;
using boost::spirit::qi::hold;
using boost::spirit::qi::labels::_1;
using boost::spirit::qi::labels::_2;
using boost::spirit::qi::labels::_3;
using boost::spirit::qi::labels::_val;
using boost::spirit::qi::lexeme;
using boost::spirit::qi::lit;
using boost::spirit::qi::lower;
using boost::spirit::qi::no_skip;
using boost::spirit::qi::omit;
using boost::spirit::qi::print;
using boost::spirit::qi::ulong_long;
using boost::spirit::qi::labels::_1;
using boost::spirit::qi::labels::_2;
using boost::spirit::qi::labels::_3;
using boost::spirit::qi::labels::_val;
namespace ascii = boost::spirit::ascii;

derived_correlation %= subquery >> correlation_specification;
derived_correlation.name("derived correlation");

table_valued_function_name %= possibly_qualified_identifier;
table_valued_function_name.name("table_valued_function_name");

table_valued_function_param %= possibly_qualified_identifier;
table_valued_function_param.name("table_valued_function_param");

table_valued_function %= hold[lexeme[ascii::no_case["table("] >>
table_valued_function_name >> "('"]] >>
-(table_valued_function_param % "','") >> "')" >> ')';
table_valued_function.name("table_valued_function");
table_reference %= joined_table | table_correlation | derived_correlation;

table_reference %= joined_table | table_correlation | derived_correlation |
table_valued_function;
table_reference.name("table reference");

from_clause %= lexeme[ascii::no_case["FROM"] > &boost::spirit::qi::space] >
Expand Down Expand Up @@ -88,7 +77,6 @@ void ADQL_parser::init_predicate() {
null_predicate | like_predicate | exists_predicate;

#ifdef DEBUG_PRED
BOOST_SPIRIT_DEBUG_NODE(table_valued_function);
BOOST_SPIRIT_DEBUG_NODE(derived_correlation);
BOOST_SPIRIT_DEBUG_NODE(table_reference);
BOOST_SPIRIT_DEBUG_NODE(from_clause);
Expand Down
2 changes: 1 addition & 1 deletion src/Query/ADQL_parser/ADQL_parser/init_query.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void ADQL_parser::init_query() {
search_condition;
having.name("having");

sort_key %= case_expression | user_defined_function | column_reference |
sort_key %= case_expression | whitelisted_function | column_reference |
unsigned_integer;

ordering_specification %= ascii::no_case[ascii::string("ASC")] |
Expand Down
12 changes: 6 additions & 6 deletions src/Query/ADQL_parser/ADQL_parser/init_reserved_words.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@ void ADQL_parser::init_reserved_words() {
using boost::spirit::qi::digit;
using boost::spirit::qi::double_;
using boost::spirit::qi::hold;
using boost::spirit::qi::labels::_1;
using boost::spirit::qi::labels::_2;
using boost::spirit::qi::labels::_3;
using boost::spirit::qi::labels::_val;
using boost::spirit::qi::lexeme;
using boost::spirit::qi::lit;
using boost::spirit::qi::lower;
using boost::spirit::qi::no_skip;
using boost::spirit::qi::omit;
using boost::spirit::qi::print;
using boost::spirit::qi::ulong_long;
using boost::spirit::qi::labels::_1;
using boost::spirit::qi::labels::_2;
using boost::spirit::qi::labels::_3;
using boost::spirit::qi::labels::_val;
namespace ascii = boost::spirit::ascii;

/// Reverse sort to avoid early matches.
// Reverse sort to avoid early matches.
ADQL_reserved_word %=
ascii::no_case["TRUNCATE"] | ascii::no_case["TOP"] |
ascii::no_case["TAP_UPLOAD"] | ascii::no_case["TAN"] |
Expand All @@ -41,7 +41,7 @@ void ADQL_parser::init_reserved_words() {
ascii::no_case["ATAN2"] | ascii::no_case["ATAN"] | ascii::no_case["ASIN"] |
ascii::no_case["AREA"] | ascii::no_case["ACOS"] | ascii::no_case["ABS"];

/// Split up SQL_reserved_word to help memory usage and compile times.
// Split up SQL_reserved_word to help memory usage and compile times.
SQL_reserved_word_00 %= ascii::no_case["ZONE"] | ascii::no_case["YEAR"] |
ascii::no_case["WRITE"] | ascii::no_case["WORK"] |
ascii::no_case["WITH"] | ascii::no_case["WHERE"] |
Expand Down
7 changes: 1 addition & 6 deletions src/Query/Query_Preprocessor/Top_Level_Components.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
#include <string>
#include <vector>

// A general ADQL query can be split into 3 parts as follows:

// A general ADQL query supported by LibADQL can be split into 3 parts as follows:

// (1) An optional WITH clause

Expand All @@ -15,10 +14,8 @@

// (3) Optional HAVING, GROUP BY, and/or ORDER BY clauses.


// For example:


// "WITH tempTable (avgDist) AS (SELECT avg(dist) FROM distTable) "

// "SELECT table_name, dist FROM distTable, tempTable WHERE distTable.dist > "
Expand All @@ -29,8 +26,6 @@

// " ORDER BY dist "



// In what follows, "SFW" represents a SELECT...FROM...[WHERE...] string.

// The components of the Top_Level_Components class correspond to
Expand Down
10 changes: 8 additions & 2 deletions src/Query/Query_Preprocessor/Top_Level_Parser.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,16 @@

#include "Top_Level_Components.hxx"

// This parser parses a general ADQL query into the 3 components
// described in Top_Level_Components.hxx, thus making the query's
// This parser parses ADQL queries into the 3 components
// described in Top_Level_Components.hxx, thus making the queries'
// select_from_where strings accessible to the Query_Preprocessor.

// Note: Queries using WITHIN GROUP (...) syntax (e.g. LISTAGG,
// PERCENTILE_CONT) will fail because Top_Level_Parser stops at the
// bare GROUP keyword expecting a trailing GROUP BY clause. Supporting
// WITHIN GROUP would require changes to how trailing clauses are
// detected.

namespace ADQL {

struct Top_Level_Parser
Expand Down
Loading