• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • API Reference
  • Sitemap
  • Contact Us
 

NepomukDaemons

queryparser.cpp

Go to the documentation of this file.
00001 /*
00002    This file is part of the Nepomuk KDE project.
00003    Copyright (C) 2007 Sebastian Trueg <trueg@kde.org>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License version 2 as published by the Free Software Foundation.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017    Boston, MA 02110-1301, USA.
00018  */
00019 
00020 #include "queryparser.h"
00021 #include "query.h"
00022 #include "term.h"
00023 
00024 #include <QtCore/QRegExp>
00025 #include <QtCore/QSet>
00026 
00027 #include <KDebug>
00028 #include <KLocale>
00029 
00030 
00031 /* Advanced queries:
00032  * select distinct ?r ?p ?x ?label ?comment where { { ?r ?p ?x . } UNION { ?r ?p ?r2 . ?r2 ?p2 ?x . } . FILTER(isLiteral(?x)) . FILTER REGEX(STR(?p),'hastag','i') . FILTER REGEX(STR(?x),'nepomuk','i') . OPTIONAL { { ?r <http://www.w3.org/2000/01/rdf-schema#label> ?label } UNION { ?r <http://www.semanticdesktop.org/ontologies/2007/08/15/nao#prefLabel> ?label . } UNION { ?r <http://freedesktop.org/standards/xesam/1.0/core#name> ?label . } . ?r <http://www.w3.org/2000/01/rdf-schema#comment> ?comment . } . }
00033  */
00034 
00035 namespace {
00036     // a field differs from a plain term in that it does never allow comparators
00037     QString s_fieldNamePattern( "([^\\s\"':=<>]+|(?:([\"'])[^\"':=<>]+\\%1))" );
00038     QString s_plainTermPattern( "([^\\s\"':=<>]+|(?:([\"'])[^\"']+\\%1))" );
00039     QString s_inExclusionPattern( "([\\+\\-]?)" );
00040     QString s_uriPattern( "<([^<>]+)>" );
00041     QString s_comparatorPattern( "(:|\\<=|\\>=|=|\\<|\\>)" );
00042 
00043     // match a simple search text
00044     // captures: 1 - The optional + or - sign (may be empty)
00045     //           2 - the search text (including optional paranthesis)
00046     QRegExp s_plainTermRx( s_inExclusionPattern + s_plainTermPattern.arg( 3 ) );
00047 
00048     // match a field search term: fieldname + relation (:, =, etc) + search text with optional paranthesis
00049     // captures: 1 - The optional + or - sign (may be empty)
00050     //           2 - fieldname
00051     //           3 - relation
00052     //           4 - search text (including optional paranthesis)
00053     QRegExp s_fieldRx( s_inExclusionPattern + s_fieldNamePattern.arg( 3 ) + s_comparatorPattern + s_plainTermPattern.arg( 6 ) );
00054 
00055     // match a property URI search term: property URI + relation (:, =, etc) + search text with optional paranthesis
00056     // captures: 1 - The optional + or - sign (may be empty)
00057     //           2 - property URI
00058     //           3 - relation
00059     //           4 - search text (including optional paranthesis)
00060     QRegExp s_propertyRx( s_inExclusionPattern + s_uriPattern + s_comparatorPattern + s_plainTermPattern.arg( 5 ) );
00061 
00062     // match a property URI search term: property URI + relation (:, =, etc) + resource URI
00063     // captures: 1 - The optional + or - sign (may be empty)
00064     //           2 - property URI
00065     //           3 - resource URI
00066     QRegExp s_resourceRx( s_inExclusionPattern + s_uriPattern + "(?::|=)" + s_uriPattern );
00067 
00068     QRegExp s_fieldFieldRx( s_inExclusionPattern + s_fieldNamePattern.arg( 3 ) + s_comparatorPattern + "\\(" +  s_fieldNamePattern.arg( 6 ) + s_comparatorPattern + s_plainTermPattern.arg( 9 ) + "\\)" );
00069 
00070     Nepomuk::Search::Term::Comparator fieldTypeRelationFromString( const QString& s ) {
00071         if ( s == "=" ) {
00072             return Nepomuk::Search::Term::Equal;
00073         }
00074         else if ( s == ":" ) {
00075             return Nepomuk::Search::Term::Contains;
00076         }
00077         else if ( s == ">" ) {
00078             return Nepomuk::Search::Term::Greater;
00079         }
00080         else if ( s == "<" ) {
00081             return Nepomuk::Search::Term::Smaller;
00082         }
00083         else if ( s == ">=" ) {
00084             return Nepomuk::Search::Term::GreaterOrEqual;
00085         }
00086         else if ( s == "<=" ) {
00087             return Nepomuk::Search::Term::SmallerOrEqual;
00088         }
00089         else {
00090             kDebug() << "FIXME: Unsupported relation:" << s;
00091             return Nepomuk::Search::Term::Equal;
00092         }
00093     }
00094 
00095     QString stripQuotes( const QString& s ) {
00096         if ( s[0] == '\'' ||
00097              s[0] == '\"' ) {
00098             return s.mid( 1 ).left( s.length()-2 );
00099         }
00100         else {
00101             return s;
00102         }
00103     }
00104 
00105     QUrl tryToBeIntelligentAboutParsingUrl( const QString& s ) {
00106         if ( s.contains( '%' ) && !s.contains( '/' ) ) {
00107             return QUrl::fromEncoded( s.toAscii() );
00108         }
00109         else {
00110             return QUrl( s );
00111         }
00112     }
00113 
00114     Soprano::LiteralValue createLiteral( const QString& s ) {
00115         bool b = false;
00116         int i = s.toInt( &b );
00117         if ( b )
00118             return Soprano::LiteralValue( i );
00119         double d = s.toDouble( &b );
00120         if ( b )
00121             return Soprano::LiteralValue( d );
00122         return s;
00123     }
00124 }
00125 
00126 
00127 Nepomuk::Search::Query Nepomuk::Search::QueryParser::parseQuery( const QString& query )
00128 {
00129     QueryParser parser;
00130     return parser.parse( query );
00131 }
00132 
00133 
00134 class Nepomuk::Search::QueryParser::Private
00135 {
00136 public:
00137     QSet<QString> andKeywords;
00138     QSet<QString> orKeywords;
00139 };
00140 
00141 
00142 Nepomuk::Search::QueryParser::QueryParser()
00143     : d( new Private() )
00144 {
00145     QString andListStr = i18nc( "Boolean AND keyword in desktop search strings. You can add several variants separated by spaces, e.g. retain the English one alongside the translation; keywords are not case sensitive. Make sure there is no conflict with the OR keyword.", "and" );
00146     foreach ( const QString &andKeyword, andListStr.split( " ", QString::SkipEmptyParts ) ) {
00147         d->andKeywords.insert( andKeyword.toLower() );
00148     }
00149     QString orListStr = i18nc( "Boolean OR keyword in desktop search strings. You can add several variants separated by spaces, e.g. retain the English one alongside the translation; keywords are not case sensitive. Make sure there is no conflict with the AND keyword.", "or" );
00150     foreach ( const QString &orKeyword, orListStr.split( " ", QString::SkipEmptyParts ) ) {
00151         d->orKeywords.insert( orKeyword.toLower() );
00152     }
00153 }
00154 
00155 
00156 Nepomuk::Search::QueryParser::~QueryParser()
00157 {
00158     delete d;
00159 }
00160 
00161 
00162 Nepomuk::Search::Query Nepomuk::Search::QueryParser::parse( const QString& query )
00163 {
00164     // TODO: a "real" parser which can handle all of the Xesam user language
00165     //       This one for example does not handle nesting at all.
00166 
00167     QList<Term> terms;
00168 
00169     bool inOrBlock = false;
00170     bool inAndBlock = false;
00171 
00172     int pos = 0;
00173     while ( pos < query.length() ) {
00174         // skip whitespace
00175         while ( pos < query.length() && query[pos].isSpace() ) {
00176             kDebug() << "Skipping space at" << pos;
00177             ++pos;
00178         }
00179 
00180         Term term;
00181 
00182         if ( pos < query.length() ) {
00183             if ( s_resourceRx.indexIn( query, pos ) == pos ) {
00184                 // FIXME: honour the +-
00185                 kDebug() << "matched resource term at" << pos << s_resourceRx.cap( 0 );
00186                 term = Term( tryToBeIntelligentAboutParsingUrl( s_resourceRx.cap( 2 ) ),
00187                              tryToBeIntelligentAboutParsingUrl( s_resourceRx.cap( 3 ) ) );
00188                 pos += s_resourceRx.matchedLength();
00189             }
00190             else if ( s_propertyRx.indexIn( query, pos ) == pos ) {
00191                 // FIXME: honour the +-
00192                 kDebug() << "matched property term at" << pos << s_propertyRx.cap( 0 );
00193                 term.setProperty( tryToBeIntelligentAboutParsingUrl( s_propertyRx.cap( 2 ) ) );
00194                 term.addSubTerm( Term( createLiteral( stripQuotes( s_propertyRx.cap( 4 ) ) ) ) );
00195                 QString comparator = s_propertyRx.cap( 3 );
00196                 term.setType( Term::ComparisonTerm );
00197                 term.setComparator( fieldTypeRelationFromString( comparator ) );
00198                 pos += s_propertyRx.matchedLength();
00199             }
00200             else if ( s_fieldFieldRx.indexIn( query, pos ) == pos ) {
00201                 kDebug() << "matched field field term at" << pos
00202                          << s_fieldFieldRx.cap( 0 )
00203                          << s_fieldFieldRx.cap( 2 )
00204                          << s_fieldFieldRx.cap( 4 )
00205                          << s_fieldFieldRx.cap( 5 )
00206                          << s_fieldFieldRx.cap( 7 )
00207                          << s_fieldFieldRx.cap( 8 );
00208                 term.setField( stripQuotes( s_fieldFieldRx.cap( 2 ) ) );
00209                 QString comparator = s_fieldFieldRx.cap( 4 );
00210                 term.setType( Term::ComparisonTerm );
00211                 term.setComparator( fieldTypeRelationFromString( comparator ) );
00212                 term.addSubTerm( Term( stripQuotes( s_fieldFieldRx.cap( 5 ) ), s_fieldFieldRx.cap( 8 ), fieldTypeRelationFromString( s_fieldFieldRx.cap( 7 ) ) ) );
00213                 pos += s_fieldFieldRx.matchedLength();
00214             }
00215             else if ( s_fieldRx.indexIn( query, pos ) == pos ) {
00216                 // FIXME: honour the +-
00217                 kDebug() << "matched field term at" << pos << s_fieldRx.cap( 0 ) << s_fieldRx.cap( 2 ) << s_fieldRx.cap( 4 ) << s_fieldRx.cap( 5 );
00218                 term.setField( stripQuotes( s_fieldRx.cap( 2 ) ) );
00219                 term.addSubTerm( Term( createLiteral( stripQuotes( s_fieldRx.cap( 5 ) ) ) ) );
00220                 QString comparator = s_fieldRx.cap( 4 );
00221                 term.setType( Term::ComparisonTerm );
00222                 term.setComparator( fieldTypeRelationFromString( comparator ) );
00223                 pos += s_fieldRx.matchedLength();
00224             }
00225             else if ( s_plainTermRx.indexIn( query, pos ) == pos ) {
00226                 // FIXME: honour the +-
00227                 QString value = stripQuotes( s_plainTermRx.cap( 2 ) );
00228                 if ( d->orKeywords.contains( value.toLower() ) ) {
00229                     inOrBlock = true;
00230                 }
00231                 else if ( d->andKeywords.contains( value.toLower() ) ) {
00232                     inAndBlock = true;
00233                 }
00234                 else {
00235                     kDebug() << "matched literal at" << pos << value;
00236                     term = Term( Soprano::LiteralValue( value ) );
00237                 }
00238                 pos += s_plainTermRx.matchedLength();
00239             }
00240             else {
00241                 kDebug() << "Invalid query at" << pos << query;
00242                 return Term();
00243             }
00244 
00245             if ( term.isValid() ) {
00246                 if ( inOrBlock && !terms.isEmpty() ) {
00247                     Term orTerm;
00248                     orTerm.setType( Term::OrTerm );
00249                     orTerm.addSubTerm( terms.takeLast() );
00250                     orTerm.addSubTerm( term );
00251                     terms.append( orTerm );
00252                 }
00253                 else if ( inAndBlock && !terms.isEmpty() ) {
00254                     Term andTerm;
00255                     andTerm.setType( Term::AndTerm );
00256                     andTerm.addSubTerm( terms.takeLast() );
00257                     andTerm.addSubTerm( term );
00258                     terms.append( andTerm );
00259                 }
00260                 else {
00261                     terms.append( term );
00262                 }
00263             }
00264         }
00265     }
00266 
00267     if ( terms.count() == 1 ) {
00268         return terms[0];
00269     }
00270     else if ( terms.count() > 0 ) {
00271         Term t;
00272         t.setType( Term::AndTerm );
00273         t.setSubTerms( terms );
00274         return t;
00275     }
00276     else {
00277         return Term();
00278     }
00279 }

NepomukDaemons

Skip menu "NepomukDaemons"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

API Reference

Skip menu "API Reference"
  • KCMShell
  • KNotify
  • KStyles
  • Nepomuk Daemons
Generated for API Reference by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal