Source for file RdqlParser.php

Documentation is available at RdqlParser.php

  1. <?php
  2.  
  3. // ----------------------------------------------------------------------------------
  4. // Class: RdqlParser
  5. // ----------------------------------------------------------------------------------
  6.  
  7.  
  8.  
  9. /**
  10. * This class contains methods for parsing an Rdql query string into PHP variables.
  11. * The output of the RdqlParser is an array with variables and constraints
  12. * of each query clause (Select, From, Where, And, Using).
  13. * To perform an RDQL query this array has to be passed to the RdqlEngine.
  14. *
  15. * <BR><BR>History:<UL>
  16. * <LI>05-12-2004 : Support for unquoted QNames added.
  17. * However, backward compatibility is provided, that means,
  18. * both prefix:local_name and <prefix:local_name> are allowed.
  19. * In the case of unquoted QNames, the parser now also checks
  20. * if a prefix is defined in the USING clause.
  21. * changes: - the stracture of the class variable $parsedQuery
  22. * - methods: parseFrom(), parseExpressions(), replaceNamespacePrefixes(),
  23. * _validateVarUri(), _validateVarUriLiteral(), _validateUri(),
  24. * _validateLiteral(), _validatePrefix()
  25. * methods added: _validateQName(), _validateNCName(), _replaceNamespacePrefix()
  26. * : Bug in the handling of empty Literals fixed.</LI>
  27. * <LI>07-27-2003 : First release of this class</LI>
  28. *
  29. * @version V0.9.1
  30. * @author Radoslaw Oldakowski <radol@gmx.de>
  31. *
  32. * @package rdql
  33. * @access public
  34. */
  35.  
  36.  
  37. Class RdqlParser extends Object{
  38.  
  39. /**
  40. * Parsed query variables and constraints.
  41. * { } are only used within the parser class and are not returned as parsed query.
  42. * ( [] stands for an integer index - 0..N )
  43. *
  44. * @var array ['selectVars'][] = ?VARNAME
  45. * ['sources'][]{['value']} = URI | QName
  46. * {['is_qname'] = boolean}
  47. * ['patterns'][]['subject']['value'] = VARorURI
  48. * {['is_qname'] = boolean}
  49. * ['predicate']['value'] = VARorURI
  50. * {['is_qname'] = boolean}
  51. * ['object']['value'] = VARorURIorLiterl
  52. * {['is_qname'] = boolean}
  53. * ['is_literal'] = boolean
  54. * ['l_lang'] = string
  55. * ['l_dtype'] = string
  56. * {['l_dtype_is_qname'] = boolean}
  57. * ['filters'][]['string'] = string
  58. * ['evalFilterStr'] = string
  59. * ['reqexEqExprs'][]['var'] = ?VARNAME
  60. * ['operator'] = (eq | ne)
  61. * ['regex'] = string
  62. * ['strEqExprs'][]['var'] = ?VARNAME
  63. * ['operator'] = (eq | ne)
  64. * ['value'] = string
  65. * ['value_type'] = ('variable' | 'URI' | 'QName' | 'Literal')
  66. * ['value_lang'] = string
  67. * ['value_dtype'] = string
  68. * {['value_dtype_is_qname'] = boolean}
  69. * ['numExpr']['vars'][] = ?VARNAME
  70. * {['ns'][PREFIX] = NAMESPACE}
  71. * @access private
  72. */
  73. var $parsedQuery;
  74.  
  75.  
  76. /**
  77. * Query string divided into a sequence of tokens.
  78. * A token is either: ' ' or "\n" or "\r" or "\t" or ',' or '(' or ')'
  79. * or a string containing any characters except from the above.
  80. *
  81. * @var array
  82. * @access private
  83. */
  84. var $tokens;
  85.  
  86.  
  87. /**
  88. * Parse the given RDQL query string and return an array with query variables and constraints.
  89. *
  90. * @param string $queryString
  91. * @return array $this->parsedQuery
  92. * @access public
  93. */
  94. function & parseQuery($queryString) {
  95.  
  96. $cleanQueryString = $this->removeComments($queryString);
  97. $this->tokenize($cleanQueryString);
  98. $this->startParsing();
  99. if ($this->parsedQuery['selectVars'][0] == '*')
  100. $this->parsedQuery['selectVars'] = $this->findAllQueryVariables();
  101. else
  102. $this->_checkSelectVars();
  103. $this->replaceNamespacePrefixes();
  104.  
  105. return $this->parsedQuery;
  106. }
  107.  
  108.  
  109. /**
  110. * Remove comments from the passed query string.
  111. *
  112. * @param string $query
  113. * @return string
  114. * @throws PHPError
  115. * @access private
  116. */
  117. function removeComments($query) {
  118. $last = strlen($query)-1;
  119. $query .= ' ';
  120. $clean = '';
  121. for ($i=0; $i<=$last; $i++) {
  122. // don't search for comments inside a 'literal'@lang^^dtype or "literal"@lang^^dtype
  123. if ($query{$i} == "'" || $query{$i} == '"') {
  124. $quotMark = $query{$i};
  125. do
  126. $clean .= $query{$i++};
  127. while($i < $last && $query{$i} != $quotMark);
  128. $clean .= $query{$i};
  129. // language
  130. if ($query{$i+1} == '@') {
  131. do{
  132. if ($query{$i+1} == '^' && $query{$i+2} == '^')
  133. break;
  134. $clean .= $query{++$i};
  135. }while ($i < $last && $query{$i} != ' ' && $query{$i} != "\t"
  136. && $query{$i} != "\n" && $query{$i} != "\r");
  137. }
  138. // datatype
  139. if ($query{$i+1} == '^' && $query{$i+2} == '^') {
  140. do
  141. $clean .= $query{++$i};
  142. while ($i < $last && $query{$i} != ' ' && $query{$i} != "\t"
  143. && $query{$i} != "\n" && $query{$i} != "\r" );
  144. }
  145. // don't search for comments inside an <URI> either
  146. }elseif ($query{$i} == '<') {
  147. do{
  148. $clean .= $query{$i++};
  149. }while($i < $last && $query{$i} != '>');
  150. $clean .= $query{$i};
  151. }elseif ($query{$i} == '/') {
  152. // clear: // comment
  153. if ($i < $last && $query{$i+1} == '/') {
  154. while($i < $last && $query{$i} != "\n" && $query{$i} != "\r")
  155. ++$i;
  156. $clean .= ' ';
  157. // clear: /*comment*/
  158. }elseif ($i < $last-2 && $query{$i+1} == '*') {
  159. $i += 2;
  160. while($i < $last && ($query{$i} != '*' || $query{$i+1} != '/'))
  161. ++$i;
  162. if ($i >= $last && ($query{$last-1} != '*' || $query{$last} != '/'))
  163. trigger_error(RDQL_SYN_ERR .": unterminated comment - '*/' missing", E_USER_ERROR);
  164. ++$i;
  165. }else
  166. $clean .= $query{$i};
  167. }else
  168. $clean .= $query{$i};
  169. }
  170. return $clean;
  171. }
  172.  
  173.  
  174. /**
  175. * Divide the query string into tokens.
  176. * A token is either: ' ' or "\n" or "\r" or '\t' or ',' or '(' or ')'
  177. * or a string containing any character except from the above.
  178. *
  179. * @param string $queryString
  180. * @access private
  181. */
  182. function tokenize($queryString) {
  183.  
  184. $queryString = trim($queryString, " \r\n\t");
  185. $specialChars = array (" ", "\t", "\r", "\n", ",", "(", ")");
  186. $len = strlen($queryString);
  187. $this->tokens[0]='';
  188. $n = 0;
  189.  
  190. for ($i=0; $i<$len; ++$i) {
  191. if (!in_array($queryString{$i}, $specialChars))
  192. $this->tokens[$n] .= $queryString{$i};
  193. else {
  194. if ($this->tokens[$n] != '')
  195. ++$n;
  196. $this->tokens[$n] = $queryString{$i};
  197. $this->tokens[++$n] = '';
  198. }
  199. }
  200. }
  201.  
  202.  
  203. /**
  204. * Start parsing of the tokenized query string.
  205. *
  206. * @access private
  207. */
  208. function startParsing() {
  209.  
  210. $this->parseSelect();
  211. }
  212.  
  213.  
  214. /**
  215. * Parse the SELECT clause of an Rdql query.
  216. * When the parsing of the SELECT clause is finished, this method will call
  217. * a suitable method to parse the subsequent clause.
  218. *
  219. * @throws PhpError
  220. * @access private
  221. */
  222. function parseSelect() {
  223.  
  224. $this->_clearWhiteSpaces();
  225.  
  226. // Check if the queryString contains a "SELECT" token
  227. if (strcasecmp('SELECT', current($this->tokens)))
  228. trigger_error(RDQL_SEL_ERR ."'" .current($this->tokens)
  229. ."' - SELECT keyword expected", E_USER_ERROR);
  230. unset($this->tokens[key($this->tokens)]);
  231. $this->_clearWhiteSpaces();
  232.  
  233. // Parse SELECT *
  234. if (current($this->tokens) == '*') {
  235. unset($this->tokens[key($this->tokens)]);
  236. $this->parsedQuery['selectVars'][0] = '*';
  237. $this->_clearWhiteSpaces();
  238. if (strcasecmp('FROM', current($this->tokens))
  239. && strcasecmp('SOURCE', current($this->tokens))
  240. && strcasecmp('WHERE', current($this->tokens)))
  241. trigger_error(RDQL_SYN_ERR .": '" .htmlspecialchars(current($this->tokens))
  242. ."' - SOURCE or WHERE clause expected", E_USER_ERROR);
  243. }
  244.  
  245. // Parse SELECT ?Var (, ?Var)*
  246. $commaExpected = FALSE;
  247. $comma = FALSE;
  248. while (current($this->tokens) != NULL) {
  249. $k = key($this->tokens);
  250. $token = $this->tokens[$k];
  251.  
  252. switch ($token) {
  253. case ',': if (!$commaExpected)
  254. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  255. $comma = TRUE;
  256. $commaExpected = FALSE;
  257. break;
  258. case '(':
  259. case ')': trigger_error(RDQL_SEL_ERR ." '$token' - illegal input", E_USER_ERROR);
  260. break;
  261. default :
  262. if (!strcasecmp('FROM', $token) || !strcasecmp('SOURCE', $token)) {
  263. if ($comma)
  264. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  265. unset($this->tokens[$k]);
  266. return $this->parseFrom();
  267. }elseif (!strcasecmp('WHERE', $token) && !$comma) {
  268. if ($comma)
  269. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  270. unset($this->tokens[$k]);
  271. return $this->parseWhere();
  272. }
  273. if ($token{0} == '?') {
  274. $this->parsedQuery['selectVars'][] = $this->_validateVar($token, RDQL_SEL_ERR);
  275. $commaExpected = TRUE;
  276. $comma = FALSE;
  277. }else
  278. trigger_error(RDQL_SEL_ERR ." '$token' - '?' missing", E_USER_ERROR);
  279. }
  280. unset($this->tokens[$k]);
  281. $this->_clearWhiteSpaces();
  282. }
  283. trigger_error(RDQL_SYN_ERR . ': WHERE clause missing', E_USER_ERROR);
  284. }
  285.  
  286.  
  287. /**
  288. * Parse the FROM/SOURCES clause of an Rdql query
  289. * When the parsing of this clause is finished, parseWhere() will be called.
  290. *
  291. * @throws PhpError
  292. * @access private
  293. */
  294. function parseFrom() {
  295.  
  296. $comma = FALSE;
  297. $commaExpected = FALSE;
  298. $i = -1;
  299. while (current($this->tokens) != NULL) {
  300.  
  301. $this->_clearWhiteSpaces();
  302. if (!strcasecmp('WHERE', current($this->tokens)) && count($this->parsedQuery['sources']) != 0) {
  303. if ($comma)
  304. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  305. unset($this->tokens[key($this->tokens)]);
  306. return $this->parseWhere();
  307. }
  308. if (current($this->tokens) == ',') {
  309. if ($commaExpected) {
  310. $comma = TRUE;
  311. $commaExpected = FALSE;
  312. unset($this->tokens[key($this->tokens)]);
  313. }else
  314. trigger_error(RDQL_SRC_ERR ."',' - unecpected comma", E_USER_ERROR);
  315. }else{
  316. $token = current($this->tokens);
  317. $this->parsedQuery['sources'][++$i]['value'] = $this->_validateURI($token, RDQL_SRC_ERR);
  318. if ($token{0} != '<')
  319. $this->parsedQuery['sources'][$i]['is_qname'] = TRUE;
  320. $commaExpected = TRUE;
  321. $comma = FALSE;
  322. }
  323. }
  324. trigger_error(RDQL_SYN_ERR .': WHERE clause missing', E_USER_ERROR);
  325. }
  326.  
  327.  
  328. /**'
  329. * Parse the WHERE clause of an Rdql query.
  330. * When the parsing of the WHERE clause is finished, this method will call
  331. * a suitable method to parse the subsequent clause if provided.
  332. *
  333. * @throws PhpError
  334. * @access private
  335. */
  336. function parseWhere() {
  337.  
  338. $comma = FALSE;
  339. $commaExpected = FALSE;
  340. $i=0;
  341.  
  342. do {
  343. $this->_clearWhiteSpaces();
  344. if (!strcasecmp('AND', current($this->tokens))
  345. && count($this->parsedQuery['patterns']) != 0){
  346. if ($comma)
  347. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  348. unset($this->tokens[key($this->tokens)]);
  349. return $this->parseAnd();
  350. }elseif (!strcasecmp('USING', current($this->tokens))
  351. && count($this->parsedQuery['patterns']) != 0) {
  352. if ($comma)
  353. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  354. unset($this->tokens[key($this->tokens)]);
  355. return $this->parseUsing();
  356. }
  357.  
  358. if (current($this->tokens) == ',') {
  359. $comma = TRUE;
  360. $this->_checkComma($commaExpected, RDQL_WHR_ERR);
  361.  
  362. }else{
  363.  
  364. if (current($this->tokens) != '(')
  365. trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens)
  366. ."' - '(' expected", E_USER_ERROR);
  367. unset($this->tokens[key($this->tokens)]);
  368. $this->_clearWhiteSpaces();
  369.  
  370. $this->parsedQuery['patterns'][$i]['subject'] = $this->_validateVarUri(current($this->tokens));
  371. $this->_checkComma(TRUE, RDQL_WHR_ERR);
  372. $this->parsedQuery['patterns'][$i]['predicate'] = $this->_validateVarUri(current($this->tokens));
  373. $this->_checkComma(TRUE, RDQL_WHR_ERR);
  374. $this->parsedQuery['patterns'][$i++]['object'] = $this->_validateVarUriLiteral(current($this->tokens));
  375. $this->_clearWhiteSpaces();
  376.  
  377. if (current($this->tokens) != ')')
  378. trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens) ."' - ')' expected", E_USER_ERROR);
  379. unset($this->tokens[key($this->tokens)]);
  380. $this->_clearWhiteSpaces();
  381. $commaExpected = TRUE;
  382. $comma = FALSE;
  383. }
  384. }while(current($this->tokens) != NULL);
  385.  
  386. if ($comma)
  387. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  388. }
  389.  
  390.  
  391. /**
  392. * Parse the AND clause of an Rdql query
  393. *
  394. * @throws PhpError
  395. * @access private
  396. * @toDo clear comments
  397. */
  398. function parseAnd() {
  399.  
  400. $this->_clearWhiteSpaces();
  401. $n = 0;
  402. $filterStr = '';
  403.  
  404. while(current($this->tokens) != NULL) {
  405. $k = key($this->tokens);
  406. $token = $this->tokens[$k];
  407.  
  408. if (!strcasecmp('USING', $token)) {
  409. $this->parseFilter($n, $filterStr);
  410. unset($this->tokens[$k]);
  411. return $this->parseUsing();
  412. }elseif ($token == ',') {
  413. $this->parseFilter($n, $filterStr);
  414. $filterStr = '';
  415. $token = '';
  416. ++$n;
  417. }
  418. $filterStr .= $token;
  419. unset($this->tokens[$k]);
  420. }
  421. $this->parseFilter($n, $filterStr);
  422. }
  423. /**
  424. * Parse the USING clause of an Rdql query
  425. *
  426. * @throws PhpError
  427. * @access private
  428. */
  429. function parseUsing() {
  430.  
  431. $commaExpected = FALSE;
  432. $comma = FALSE;
  433.  
  434. do {
  435. $this->_clearWhiteSpaces();
  436. if (current($this->tokens) == ',') {
  437. $comma = TRUE;
  438. $this->_checkComma($commaExpected, RDQL_USG_ERR);
  439. }else{
  440. $prefix = $this->_validatePrefix(current($this->tokens));
  441. $this->_clearWhiteSpaces();
  442.  
  443. if (strcasecmp('FOR', current($this->tokens)))
  444. trigger_error(RDQL_USG_ERR ." keyword: 'FOR' missing in the namespace declaration: '", E_USER_ERROR);
  445. unset($this->tokens[key($this->tokens)]);
  446. $this->_clearWhiteSpaces();
  447.  
  448. $this->parsedQuery['ns'][$prefix] = $this->_validateUri(current($this->tokens), RDQL_USG_ERR);
  449. $this->_clearWhiteSpaces();
  450. $commaExpected = TRUE;
  451. $comma = FALSE;
  452. }
  453. }while(current($this->tokens) != NULL);
  454.  
  455. if ($comma)
  456. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  457. }
  458.  
  459.  
  460. /**
  461. * Check if a filter from the AND clause contains an equal number of '(' and ')'
  462. * and parse filter expressions.
  463. *
  464. * @param integer $n
  465. * @param string $filter
  466. * @throws PHPError
  467. * @access private
  468. */
  469. function parseFilter($n, $filter) {
  470.  
  471. if ($filter == NULL)
  472. trigger_error(RDQL_AND_ERR ." ',' - unexpected comma", E_USER_ERROR);
  473. $paren = substr_count($filter, '(') - substr_count($filter, ')');
  474. if ($paren != 0) {
  475. if ($paren > 0)
  476. $errorMsg = "'" .htmlspecialchars($filter) ."' - ')' missing ";
  477. elseif ($paren < 0)
  478. $errorMsg = "'" .htmlspecialchars($filter) ."' - too many ')' ";
  479. trigger_error(RDQL_AND_ERR .$errorMsg, E_USER_ERROR);
  480. }
  481.  
  482. $this->parsedQuery['filters'][$n] = $this->parseExpressions($filter);
  483. }
  484.  
  485.  
  486. /**
  487. * Parse expressions inside the passed filter:
  488. * 1) regex equality expressions: ?var [~~ | =~ | !~ ] REG_EX
  489. * 2a) string equality expressions: ?var [eq | ne] "literal"@lang^^dtype.
  490. * 2b) string equality expressions: ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
  491. * 3) numerical expressions: e.q. (?var1 - ?var2)*4 >= 20
  492. *
  493. * In cases 1-2 parse each expression of the given filter into an array of variables.
  494. * For each parsed expression put a place holder (e.g. ##RegEx_1##) into the filterStr.
  495. * The RDQLengine will then replace each place holder with the outcomming boolean value
  496. * of the corresponding expression.
  497. * The remaining filterStr contains only numerical expressions and place holders.
  498. *
  499. * @param string $filteStr
  500. * @return array ['string'] = string
  501. * ['evalFilterStr'] = string
  502. * ['reqexEqExprs'][]['var'] = ?VARNAME
  503. * ['operator'] = (eq | ne)
  504. * ['regex'] = string
  505. * ['strEqExprs'][]['var'] = ?VARNAME
  506. * ['operator'] = (eq | ne)
  507. * ['value'] = string
  508. * ['value_type'] = ('variable' | 'URI' | 'QName'| 'Literal')
  509. * ['value_lang'] = string
  510. * ['value_dtype'] = string
  511. * ['value_dtype_is_qname'] = boolean
  512. * ['numExpr']['vars'][] = ?VARNAME
  513. * @access private
  514. */
  515. function parseExpressions($filterStr) {
  516.  
  517. $parsedFilter['string'] = $filterStr;
  518. $parsedFilter['regexEqExprs'] = array();
  519. $parsedFilter['strEqExprs'] = array();
  520. $parsedFilter['numExprVars'] = array();
  521.  
  522. // parse regex string equality expressions, e.g. ?x ~~ !//foo.com/r!i
  523. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+([~!=]~)\s+(['|\"])?([^\s'\"]+)(['|\"])?/";
  524. preg_match_all($reg_ex, $filterStr, $eqExprs);
  525. foreach ($eqExprs[0] as $i => $eqExpr) {
  526. $this->_checkRegExQuotation($filterStr, $eqExprs[3][$i], $eqExprs[5][$i]);
  527. $parsedFilter['regexEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);
  528. $parsedFilter['regexEqExprs'][$i]['operator'] = $eqExprs[2][$i];
  529. $parsedFilter['regexEqExprs'][$i]['regex'] = $eqExprs[4][$i];
  530.  
  531. $filterStr = str_replace($eqExpr, " ##RegEx_$i## ", $filterStr);
  532. }
  533.  
  534. // parse ?var [eq | ne] "literal"@lang^^dtype
  535. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\'[^\']*\'|\"[^\"]*\")";
  536. $reg_ex .= "(@[a-zA-Z]+)?(\^{2}\S+:?\S+)?/i";
  537. preg_match_all($reg_ex, $filterStr, $eqExprs);
  538. foreach ($eqExprs[0] as $i => $eqExpr) {
  539. $parsedFilter['strEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);#
  540. $parsedFilter['strEqExprs'][$i]['operator'] = strtolower($eqExprs[2][$i]);
  541. $parsedFilter['strEqExprs'][$i]['value'] = trim($eqExprs[3][$i],"'\"");
  542. $parsedFilter['strEqExprs'][$i]['value_type'] = 'Literal';
  543. $parsedFilter['strEqExprs'][$i]['value_lang'] = substr($eqExprs[4][$i], 1);
  544. $dtype = substr($eqExprs[5][$i], 2);
  545. if ($dtype) {
  546. $parsedFilter['strEqExprs'][$i]['value_dtype'] = $this->_validateUri($dtype, RDQL_AND_ERR);
  547. if ($dtype{0} != '<')
  548. $parsedFilter['strEqExprs'][$i]['value_dtype_is_qname'] = TRUE;
  549. }else
  550. $parsedFilter['strEqExprs'][$i]['value_dtype'] = '';
  551.  
  552. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$i## ", $filterStr);
  553. }
  554. // parse ?var [eq | ne] ?var
  555. $ii = count($parsedFilter['strEqExprs']);
  556. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\?[a-zA-Z0-9_]+)/i";
  557. preg_match_all($reg_ex, $filterStr, $eqExprs);
  558. foreach ($eqExprs[0] as $i => $eqExpr) {
  559. $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
  560. $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
  561. $parsedFilter['strEqExprs'][$ii]['value'] = $this->_isDefined($eqExprs[3][$i]);
  562. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'variable';
  563.  
  564. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
  565. $ii++;
  566. }
  567.  
  568. // parse ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
  569. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+((<\S+>)|(\S+:\S*))/i";
  570. preg_match_all($reg_ex, $filterStr, $eqExprs);
  571. foreach ($eqExprs[0] as $i => $eqExpr) {
  572. $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
  573. $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
  574. if ($eqExprs[4][$i]) {
  575. $parsedFilter['strEqExprs'][$ii]['value'] = trim($eqExprs[4][$i], "<>");
  576. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'URI';
  577. }else if($eqExprs[5][$i]){
  578. $this->_validateQName($eqExprs[5][$i], RDQL_AND_ERR);
  579. $parsedFilter['strEqExprs'][$ii]['value'] = $eqExprs[5][$i];
  580. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'QName';
  581. }
  582.  
  583. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
  584. $ii++;
  585. }
  586. $parsedFilter['evalFilterStr'] = $filterStr;
  587.  
  588. // all that is left are numerical expressions and place holders for the above expressions
  589. preg_match_all("/\?[a-zA-Z0-9_]+/", $filterStr, $vars);
  590. foreach ($vars[0] as $var) {
  591. $parsedFilter['numExprVars'][] = $this->_isDefined($var);
  592. }
  593.  
  594. return $parsedFilter;
  595. }
  596.  
  597.  
  598. /**
  599. * Find all query variables used in the WHERE clause.
  600. *
  601. * @return array [] = ?VARNAME
  602. * @access private
  603. */
  604. function findAllQueryVariables() {
  605.  
  606. $vars = array();
  607. foreach ($this->parsedQuery['patterns'] as $pattern) {
  608. $count = 0;
  609. foreach ($pattern as $v) {
  610. if ($v['value'] && $v['value']{0} == '?') {
  611. ++$count;
  612. if (!in_array($v['value'], $vars))
  613. $vars[] = $v['value'];
  614. }
  615. }
  616. if (!$count)
  617. trigger_error(RDQL_WHR_ERR .'pattern contains no variables', E_USER_ERROR);
  618. }
  619.  
  620. return $vars;
  621. }
  622.  
  623.  
  624. /**
  625. * Replace all namespace prefixes in the pattern and constraint clause of an rdql query
  626. * with the namespaces declared in the USING clause and default namespaces.
  627. *
  628. * @access private
  629. */
  630. function replaceNamespacePrefixes() {
  631.  
  632. global $default_prefixes;
  633.  
  634. if (!isset($this->parsedQuery['ns']))
  635. $this->parsedQuery['ns'] = array();
  636.  
  637. // add default namespaces
  638. // if in an rdql query a reserved prefix (e.g. rdf: rdfs:) is used
  639. // it will be overridden by the default namespace defined in constants.php
  640. $this->parsedQuery['ns'] = array_merge($this->parsedQuery['ns'], $default_prefixes);
  641.  
  642. // replace namespace prefixes in the FROM clause
  643. if (isset($this->parsedQuery['sources']))
  644. foreach ($this->parsedQuery['sources'] as $n => $source) {
  645. if (isset($source['is_qname']))
  646. $this->parsedQuery['sources'][$n] = $this->_replaceNamespacePrefix($source['value'], RDQL_SRC_ERR);
  647. else {
  648. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  649. $source['value'] = eregi_replace("$prefix:", $uri, $source['value']);
  650. $this->parsedQuery['sources'][$n] = $source['value'];
  651. }
  652. }
  653. // replace namespace prefixes in the where clause
  654. foreach ($this->parsedQuery['patterns'] as $n => $pattern) {
  655. foreach ($pattern as $key => $v)
  656. if ($v['value'] && $v['value']{0} != '?') {
  657. if (isset($v['is_qname'])) {
  658. $this->parsedQuery['patterns'][$n][$key]['value']
  659. = $this->_replaceNamespacePrefix($v['value'], RDQL_WHR_ERR);
  660. unset($this->parsedQuery['patterns'][$n][$key]['is_qname']);
  661. } else { // is quoted URI (== <URI>) or Literal
  662. if (isset($this->parsedQuery['patterns'][$n][$key]['is_literal'])) {
  663. if (isset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname'])) {
  664. $this->parsedQuery['patterns'][$n][$key]['l_dtype']
  665. = $this->_replaceNamespacePrefix($v['l_dtype'], RDQL_WHR_ERR);
  666. unset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname']);
  667. }else {
  668. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  669. $this->parsedQuery['patterns'][$n][$key]['l_dtype']
  670. = eregi_replace("$prefix:", $uri, $this->parsedQuery['patterns'][$n][$key]['l_dtype']);
  671. }
  672. }else {
  673. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  674. $this->parsedQuery['patterns'][$n][$key]['value']
  675. = eregi_replace("$prefix:", $uri, $this->parsedQuery['patterns'][$n][$key]['value']);
  676. }
  677. }
  678. }
  679. }
  680.  
  681. // replace prefixes in the constraint clause
  682. if (isset($this->parsedQuery['filters']))
  683. foreach ($this->parsedQuery['filters'] as $n => $filter)
  684. foreach ($filter['strEqExprs'] as $i => $expr) {
  685. if ($expr['value_type'] == 'QName') {
  686. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
  687. = $this->_replaceNamespacePrefix($expr['value'], RDQL_AND_ERR);
  688. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_type'] = 'URI';
  689. }
  690. if ($expr['value_type'] == 'URI')
  691. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  692. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
  693. = eregi_replace("$prefix:", $uri,
  694. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']);
  695. elseif ($expr['value_type'] == 'Literal') {
  696. if (isset($expr['value_dtype_is_qname'])) {
  697. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
  698. = $this->_replaceNamespacePrefix($expr['value_dtype'], RDQL_AND_ERR);
  699. unset($this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype_is_qname']);
  700. } else {
  701. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  702. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
  703. = eregi_replace("$prefix:", $uri,
  704. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']);
  705. }
  706. }
  707. }
  708.  
  709. unset($this->parsedQuery['ns']);
  710. }
  711.  
  712.  
  713. // =============================================================================
  714. // *************************** helper functions ********************************
  715. // =============================================================================
  716.  
  717.  
  718.  
  719.  
  720.  
  721. /**
  722. * Remove whitespace-tokens from the array $this->tokens
  723. *
  724. * @access private
  725. */
  726. function _clearWhiteSpaces() {
  727.  
  728. while (current($this->tokens) == ' ' ||
  729. current($this->tokens) == "\n" ||
  730. current($this->tokens) == "\t" ||
  731. current($this->tokens) == "\r")
  732.  
  733. unset($this->tokens[key($this->tokens)]);
  734. }
  735.  
  736.  
  737. /**
  738. * Check if the query string of the given clause contains an undesired ','.
  739. * If a comma was correctly placed then remove it and clear all whitespaces.
  740. *
  741. * @param string $commaExpected
  742. * @param string $clause_error
  743. * @throws PHPError
  744. * @access private
  745. */
  746. function _checkComma($commaExpected, $clause_error) {
  747.  
  748. $this->_clearWhiteSpaces();
  749. if (current($this->tokens) == ',') {
  750. if (!$commaExpected)
  751. trigger_error($clause_error ."',' - unexpected comma", E_USER_ERROR);
  752. else {
  753. unset($this->tokens[key($this->tokens)]);
  754. $this->_checkComma(FALSE, $clause_error);
  755. }
  756. }
  757. }
  758.  
  759. /**
  760. * Check if the given token is either a variable (?var) or the first token of an URI (<URI>).
  761. * In case of an URI this function returns the whole URI string.
  762. *
  763. * @param string $token
  764. * @return array ['value'] = string
  765. * @throws PHPError
  766. * @access private
  767. */
  768. function _validateVarUri($token) {
  769. if ($token{0} == '?') {
  770. $token_res['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
  771. } else {
  772. $token_res['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  773. if ($token{0} != '<')
  774. $token_res['is_qname'] = TRUE;
  775. }
  776. return $token_res;
  777. }
  778.  
  779.  
  780. /**
  781. * Check if the given token is either a variable (?var) or the first token
  782. * of either an URI (<URI>) or a literal ("Literal").
  783. * In case of a literal return an array with literal properties (value, language, datatype).
  784. * In case of a variable or an URI return only ['value'] = string.
  785. *
  786. * @param string $token
  787. * @return array ['value'] = string
  788. * ['is_qname'] = boolean
  789. * ['is_literal'] = boolean
  790. * ['l_lang'] = string
  791. * ['l_dtype'] = string
  792. * @throws PHPError
  793. * @access private
  794. */
  795. function _validateVarUriLiteral($token) {
  796. if ($token{0} == '?')
  797. $statement_object['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
  798. elseif ($token{0} == "'" || $token{0} == '"')
  799. $statement_object = $this->_validateLiteral($token);
  800. elseif ($token{0} == '<')
  801. $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  802. elseif (ereg(':', $token)) {
  803. $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  804. $statement_object['is_qname'] = TRUE;
  805. }else
  806. trigger_error(RDQL_WHR_ERR ." '$token' - ?Variable, &lt;URI&gt;, QName, or \"LITERAL\" expected", E_USER_ERROR);
  807. return $statement_object;
  808. }
  809.  
  810. /**
  811. * Check if the given token is a valid variable name (?var).
  812. *
  813. * @param string $token
  814. * @param string $clause_error
  815. * @return string
  816. * @throws PHPError
  817. * @access private
  818. */
  819. function _validateVar($token, $clause_error) {
  820.  
  821. preg_match("/\?[a-zA-Z0-9_]+/", $token, $match);
  822. if (!isset($match[0]) || $match[0] != $token)
  823. trigger_error($clause_error ."'" .htmlspecialchars($token)
  824. ."' - variable name contains illegal characters", E_USER_ERROR);
  825. unset($this->tokens[key($this->tokens)]);
  826. return $token;
  827. }
  828.  
  829.  
  830. /**
  831. * Check if $token is the first token of a valid URI (<URI>) and return the whole URI string
  832. *
  833. * @param string $token
  834. * @param string $clause_error
  835. * @return string
  836. * @throws PHPError
  837. * @access private
  838. */
  839. function _validateUri($token, $clause_error) {
  840.  
  841. if ($token{0} != '<') {
  842. if (strpos($token, ':') && $this->_validateQName($token, $clause_error)) {
  843. unset($this->tokens[key($this->tokens)]);
  844. return rtrim($token, ':');
  845. }
  846. $errmsg = $clause_error .'\'' .htmlspecialchars($token) .'\' ';
  847. if ($clause_error == RDQL_WHR_ERR)
  848. $errmsg .= "- ?Variable or &lt;URI&gt; or QName expected";
  849. else
  850. $errmsg .= "- &lt;URI&gt; or QName expected";
  851. trigger_error($errmsg, E_USER_ERROR);
  852. }else{
  853. $token_res = $token;
  854. while($token{strlen($token)-1} != '>' && $token != NULL) {
  855. if ($token == '(' || $token == ')' || $token == ',' ||
  856. $token == ' ' || $token == "\n" || $token == "\r") {
  857. trigger_error($clause_error .'\'' .htmlspecialchars($token_res)
  858. ."' - illegal input: '$token' - '>' missing", E_USER_ERROR);
  859. }
  860. unset($this->tokens[key($this->tokens)]);
  861. $token = current($this->tokens);
  862. $token_res .= $token;
  863. }
  864. if ($token == NULL)
  865. trigger_error($clause_error .'\'' .htmlspecialchars($token_res) ."' - '>' missing", E_USER_ERROR);
  866. unset($this->tokens[key($this->tokens)]);
  867. return trim($token_res, '<>');
  868. }
  869. }
  870.  
  871.  
  872. /**
  873. * Check if $token is the first token of a valid literal ("LITERAL") and
  874. * return an array with literal properties (value, language, datatype).
  875. *
  876. * @param string $token
  877. * @return array ['value'] = string
  878. * ['is_literal'] = boolean
  879. * ['l_lang'] = string
  880. * ['l_dtype'] = string
  881. * ['l_dtype_is_qname'] = boolean
  882. * @throws PHPError
  883. * @access private
  884. */
  885. function _validateLiteral($token) {
  886.  
  887. $quotation_mark = $token{0};
  888. $statement_object = array ('value' => '',
  889. 'is_literal' => TRUE,
  890. 'l_lang' => '',
  891. 'l_dtype' => '');
  892. $this->tokens[key($this->tokens)] = substr($token,1);
  893.  
  894. $return = FALSE;
  895. foreach ($this->tokens as $k => $token) {
  896.  
  897. if ($token != NULL && $token{strlen($token)-1} == $quotation_mark) {
  898. $token = rtrim($token, $quotation_mark);
  899. $return = TRUE;
  900.  
  901. // parse @language(^^datatype)?
  902. }elseif (strpos($token, $quotation_mark .'@') || substr($token, 0, 2) == $quotation_mark .'@') {
  903. $lang = substr($token, strpos($token, $quotation_mark .'@')+2);
  904. if (strpos($lang, '^^') || substr($lang, 0,2) == '^^') {
  905. $dtype = substr($lang, strpos($lang, '^^')+2);
  906. if (!$dtype)
  907. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  908. .$token ." - datatype expected" ,E_USER_ERROR);
  909. $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
  910. if ($dtype{0} != '<')
  911. $statement_object['l_dtype_is_qname'] = TRUE;
  912. $lang = substr($lang, 0, strpos($lang, '^^'));
  913. }
  914. if (!$lang)
  915. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  916. .$token ." - language expected" ,E_USER_ERROR);
  917. $statement_object['l_lang'] = $lang;
  918. $token = substr($token, 0, strpos($token, $quotation_mark .'@'));
  919. $return = TRUE;
  920.  
  921. // parse ^^datatype
  922. }elseif (strpos($token, $quotation_mark .'^^') || substr($token, 0, 3) == $quotation_mark .'^^') {
  923. $dtype = substr($token, strpos($token, $quotation_mark .'^^')+3);
  924. if (!$dtype)
  925. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  926. .$token ." - datatype expected" ,E_USER_ERROR);
  927.  
  928. $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
  929. if ($dtype{0} != '<')
  930. $statement_object['l_dtype_is_qname'] = TRUE;
  931.  
  932. $token = substr($token, 0, strpos($token, $quotation_mark .'^^'));
  933. $return = TRUE;
  934. }elseif (strpos($token, $quotation_mark))
  935. trigger_error(RDQL_WHR_ERR ."'$token' - illegal input", E_USER_ERROR);
  936. $statement_object['value'] .= $token;
  937. unset($this->tokens[$k]);
  938. if ($return)
  939. return $statement_object;
  940. }
  941. trigger_error(RDQL_WHR_ERR ."quotation end mark: $quotation_mark missing", E_USER_ERROR);
  942. }
  943.  
  944. /**
  945. * Check if the given token is a valid QName.
  946. *
  947. * @param string $token
  948. * @param string $clause_error
  949. * @return boolean
  950. * @throws PHPError
  951. * @access private
  952. */
  953. function _validateQName($token, $clause_error) {
  954. $parts = explode(':', $token);
  955. if (count($parts) > 2)
  956. trigger_error($clause_error ."illegal QName: '$token'", E_USER_ERROR);
  957. if (!$this->_validateNCName($parts[0]))
  958. trigger_error($clause_error ."illegal prefix in QName: '$token'", E_USER_ERROR);
  959. if ($parts[1] && !$this->_validateNCName($parts[1]))
  960. trigger_error($clause_error ."illegal local part in QName: '$token'", E_USER_ERROR);
  961. return TRUE;
  962. }
  963.  
  964.  
  965. /**
  966. * Check if the given token is a valid NCName.
  967. *
  968. * @param string $token
  969. * @return boolean
  970. * @access private
  971. */
  972. function _validateNCName($token) {
  973. preg_match("/[a-zA-Z_]+[a-zA-Z_0-9.\-]*/", $token, $match);
  974. if (isset($match[0]) && $match[0] == $token)
  975. return TRUE;
  976. return FALSE;
  977. }
  978.  
  979.  
  980. /**
  981. * Check if the given token is a valid namespace prefix.
  982. *
  983. * @param string $token
  984. * @return string
  985. * @throws PHPError
  986. * @access private
  987. */
  988. function _validatePrefix($token) {
  989.  
  990. if (!$this->_validateNCName($token))
  991. trigger_error(RDQL_USG_ERR ."'" .htmlspecialchars($token)
  992. ."' - illegal input, namespace prefix expected", E_USER_ERROR);
  993. unset($this->tokens[key($this->tokens)]);
  994. return $token;
  995. }
  996.  
  997. /**
  998. * Replace a prefix in a given QName and return a full URI.
  999. *
  1000. * @param string $qName
  1001. * @param string $clasue_error
  1002. * @return string
  1003. * @throws PHPError
  1004. * @access private
  1005. */
  1006. function _replaceNamespacePrefix($qName, $clause_error) {
  1007.  
  1008. $qName_parts = explode(':', $qName);
  1009. if (!array_key_exists($qName_parts[0], $this->parsedQuery['ns']))
  1010. trigger_error($clause_error .'undefined prefix: \'' .$qName_parts[0] .'\' in: \'' .$qName .'\'', E_USER_ERROR);
  1011. return $this->parsedQuery['ns'][$qName_parts[0]] .$qName_parts[1];
  1012. }
  1013. /**
  1014. * Check if all variables from the SELECT clause are defined in the WHERE clause
  1015. *
  1016. * @access private
  1017. */
  1018. function _checkSelectVars() {
  1019.  
  1020. foreach ($this->parsedQuery['selectVars'] as $var)
  1021. $this->_isDefined($var);
  1022. }
  1023.  
  1024.  
  1025. /**
  1026. * Check if the given variable is defined in the WHERE clause.
  1027. *
  1028. * @param $var string
  1029. * @return string
  1030. * @throws PHPError
  1031. * @access private
  1032. */
  1033. function _isDefined($var) {
  1034.  
  1035. $allQueryVars = $this->findAllQueryVariables();
  1036.  
  1037. if (!in_array($var, $allQueryVars))
  1038. trigger_error(RDQL_SYN_ERR .": '$var' - variable must be defined in the WHERE clause", E_USER_ERROR);
  1039. return $var;
  1040. }
  1041.  
  1042.  
  1043. /**
  1044. * Throw an error if the regular expression from the AND clause is not quoted.
  1045. *
  1046. * @param string $filterString
  1047. * @param string $lQuotMark
  1048. * @param string $rQuotMark
  1049. * @throws PHPError
  1050. * @access private
  1051. */
  1052. function _checkRegExQuotation($filterString, $lQuotMark, $rQuotMark) {
  1053.  
  1054. if (!$lQuotMark)
  1055. trigger_error(RDQL_AND_ERR ."'$filterString' - regular expressions must be quoted", E_USER_ERROR);
  1056.  
  1057. if ($lQuotMark != $rQuotMark)
  1058. trigger_error(RDQL_AND_ERR ."'$filterString' - quotation end mark in the regular expression missing", E_USER_ERROR);
  1059. }
  1060.  
  1061. } // end: Class RdqlParser
  1062.  
  1063. ?>

Documentation generated on Fri, 17 Dec 2004 16:17:42 +0100 by phpDocumentor 1.3.0RC3