NepomukDaemons
queryparser.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "queryparser.h"
00021 #include "query.h"
00022 #include "term.h"
00023
00024 #include <QtCore/QRegExp>
00025 #include <QtCore/QSet>
00026
00027 #include <KDebug>
00028 #include <KLocale>
00029
00030
00031
00032
00033
00034
00035 namespace {
00036
00037 QString s_fieldNamePattern( "([^\\s\"':=<>]+|(?:([\"'])[^\"':=<>]+\\%1))" );
00038 QString s_plainTermPattern( "([^\\s\"':=<>]+|(?:([\"'])[^\"']+\\%1))" );
00039 QString s_inExclusionPattern( "([\\+\\-]?)" );
00040 QString s_uriPattern( "<([^<>]+)>" );
00041 QString s_comparatorPattern( "(:|\\<=|\\>=|=|\\<|\\>)" );
00042
00043
00044
00045
00046 QRegExp s_plainTermRx( s_inExclusionPattern + s_plainTermPattern.arg( 3 ) );
00047
00048
00049
00050
00051
00052
00053 QRegExp s_fieldRx( s_inExclusionPattern + s_fieldNamePattern.arg( 3 ) + s_comparatorPattern + s_plainTermPattern.arg( 6 ) );
00054
00055
00056
00057
00058
00059
00060 QRegExp s_propertyRx( s_inExclusionPattern + s_uriPattern + s_comparatorPattern + s_plainTermPattern.arg( 5 ) );
00061
00062
00063
00064
00065
00066 QRegExp s_resourceRx( s_inExclusionPattern + s_uriPattern + "(?::|=)" + s_uriPattern );
00067
00068 QRegExp s_fieldFieldRx( s_inExclusionPattern + s_fieldNamePattern.arg( 3 ) + s_comparatorPattern + "\\(" + s_fieldNamePattern.arg( 6 ) + s_comparatorPattern + s_plainTermPattern.arg( 9 ) + "\\)" );
00069
00070 Nepomuk::Search::Term::Comparator fieldTypeRelationFromString( const QString& s ) {
00071 if ( s == "=" ) {
00072 return Nepomuk::Search::Term::Equal;
00073 }
00074 else if ( s == ":" ) {
00075 return Nepomuk::Search::Term::Contains;
00076 }
00077 else if ( s == ">" ) {
00078 return Nepomuk::Search::Term::Greater;
00079 }
00080 else if ( s == "<" ) {
00081 return Nepomuk::Search::Term::Smaller;
00082 }
00083 else if ( s == ">=" ) {
00084 return Nepomuk::Search::Term::GreaterOrEqual;
00085 }
00086 else if ( s == "<=" ) {
00087 return Nepomuk::Search::Term::SmallerOrEqual;
00088 }
00089 else {
00090 kDebug() << "FIXME: Unsupported relation:" << s;
00091 return Nepomuk::Search::Term::Equal;
00092 }
00093 }
00094
00095 QString stripQuotes( const QString& s ) {
00096 if ( s[0] == '\'' ||
00097 s[0] == '\"' ) {
00098 return s.mid( 1 ).left( s.length()-2 );
00099 }
00100 else {
00101 return s;
00102 }
00103 }
00104
00105 QUrl tryToBeIntelligentAboutParsingUrl( const QString& s ) {
00106 if ( s.contains( '%' ) && !s.contains( '/' ) ) {
00107 return QUrl::fromEncoded( s.toAscii() );
00108 }
00109 else {
00110 return QUrl( s );
00111 }
00112 }
00113
00114 Soprano::LiteralValue createLiteral( const QString& s ) {
00115 bool b = false;
00116 int i = s.toInt( &b );
00117 if ( b )
00118 return Soprano::LiteralValue( i );
00119 double d = s.toDouble( &b );
00120 if ( b )
00121 return Soprano::LiteralValue( d );
00122 return s;
00123 }
00124 }
00125
00126
00127 Nepomuk::Search::Query Nepomuk::Search::QueryParser::parseQuery( const QString& query )
00128 {
00129 QueryParser parser;
00130 return parser.parse( query );
00131 }
00132
00133
00134 class Nepomuk::Search::QueryParser::Private
00135 {
00136 public:
00137 QSet<QString> andKeywords;
00138 QSet<QString> orKeywords;
00139 };
00140
00141
00142 Nepomuk::Search::QueryParser::QueryParser()
00143 : d( new Private() )
00144 {
00145 QString andListStr = i18nc( "Boolean AND keyword in desktop search strings. You can add several variants separated by spaces, e.g. retain the English one alongside the translation; keywords are not case sensitive. Make sure there is no conflict with the OR keyword.", "and" );
00146 foreach ( const QString &andKeyword, andListStr.split( " ", QString::SkipEmptyParts ) ) {
00147 d->andKeywords.insert( andKeyword.toLower() );
00148 }
00149 QString orListStr = i18nc( "Boolean OR keyword in desktop search strings. You can add several variants separated by spaces, e.g. retain the English one alongside the translation; keywords are not case sensitive. Make sure there is no conflict with the AND keyword.", "or" );
00150 foreach ( const QString &orKeyword, orListStr.split( " ", QString::SkipEmptyParts ) ) {
00151 d->orKeywords.insert( orKeyword.toLower() );
00152 }
00153 }
00154
00155
00156 Nepomuk::Search::QueryParser::~QueryParser()
00157 {
00158 delete d;
00159 }
00160
00161
00162 Nepomuk::Search::Query Nepomuk::Search::QueryParser::parse( const QString& query )
00163 {
00164
00165
00166
00167 QList<Term> terms;
00168
00169 bool inOrBlock = false;
00170 bool inAndBlock = false;
00171
00172 int pos = 0;
00173 while ( pos < query.length() ) {
00174
00175 while ( pos < query.length() && query[pos].isSpace() ) {
00176 kDebug() << "Skipping space at" << pos;
00177 ++pos;
00178 }
00179
00180 Term term;
00181
00182 if ( pos < query.length() ) {
00183 if ( s_resourceRx.indexIn( query, pos ) == pos ) {
00184
00185 kDebug() << "matched resource term at" << pos << s_resourceRx.cap( 0 );
00186 term = Term( tryToBeIntelligentAboutParsingUrl( s_resourceRx.cap( 2 ) ),
00187 tryToBeIntelligentAboutParsingUrl( s_resourceRx.cap( 3 ) ) );
00188 pos += s_resourceRx.matchedLength();
00189 }
00190 else if ( s_propertyRx.indexIn( query, pos ) == pos ) {
00191
00192 kDebug() << "matched property term at" << pos << s_propertyRx.cap( 0 );
00193 term.setProperty( tryToBeIntelligentAboutParsingUrl( s_propertyRx.cap( 2 ) ) );
00194 term.addSubTerm( Term( createLiteral( stripQuotes( s_propertyRx.cap( 4 ) ) ) ) );
00195 QString comparator = s_propertyRx.cap( 3 );
00196 term.setType( Term::ComparisonTerm );
00197 term.setComparator( fieldTypeRelationFromString( comparator ) );
00198 pos += s_propertyRx.matchedLength();
00199 }
00200 else if ( s_fieldFieldRx.indexIn( query, pos ) == pos ) {
00201 kDebug() << "matched field field term at" << pos
00202 << s_fieldFieldRx.cap( 0 )
00203 << s_fieldFieldRx.cap( 2 )
00204 << s_fieldFieldRx.cap( 4 )
00205 << s_fieldFieldRx.cap( 5 )
00206 << s_fieldFieldRx.cap( 7 )
00207 << s_fieldFieldRx.cap( 8 );
00208 term.setField( stripQuotes( s_fieldFieldRx.cap( 2 ) ) );
00209 QString comparator = s_fieldFieldRx.cap( 4 );
00210 term.setType( Term::ComparisonTerm );
00211 term.setComparator( fieldTypeRelationFromString( comparator ) );
00212 term.addSubTerm( Term( stripQuotes( s_fieldFieldRx.cap( 5 ) ), s_fieldFieldRx.cap( 8 ), fieldTypeRelationFromString( s_fieldFieldRx.cap( 7 ) ) ) );
00213 pos += s_fieldFieldRx.matchedLength();
00214 }
00215 else if ( s_fieldRx.indexIn( query, pos ) == pos ) {
00216
00217 kDebug() << "matched field term at" << pos << s_fieldRx.cap( 0 ) << s_fieldRx.cap( 2 ) << s_fieldRx.cap( 4 ) << s_fieldRx.cap( 5 );
00218 term.setField( stripQuotes( s_fieldRx.cap( 2 ) ) );
00219 term.addSubTerm( Term( createLiteral( stripQuotes( s_fieldRx.cap( 5 ) ) ) ) );
00220 QString comparator = s_fieldRx.cap( 4 );
00221 term.setType( Term::ComparisonTerm );
00222 term.setComparator( fieldTypeRelationFromString( comparator ) );
00223 pos += s_fieldRx.matchedLength();
00224 }
00225 else if ( s_plainTermRx.indexIn( query, pos ) == pos ) {
00226
00227 QString value = stripQuotes( s_plainTermRx.cap( 2 ) );
00228 if ( d->orKeywords.contains( value.toLower() ) ) {
00229 inOrBlock = true;
00230 }
00231 else if ( d->andKeywords.contains( value.toLower() ) ) {
00232 inAndBlock = true;
00233 }
00234 else {
00235 kDebug() << "matched literal at" << pos << value;
00236 term = Term( Soprano::LiteralValue( value ) );
00237 }
00238 pos += s_plainTermRx.matchedLength();
00239 }
00240 else {
00241 kDebug() << "Invalid query at" << pos << query;
00242 return Term();
00243 }
00244
00245 if ( term.isValid() ) {
00246 if ( inOrBlock && !terms.isEmpty() ) {
00247 Term orTerm;
00248 orTerm.setType( Term::OrTerm );
00249 orTerm.addSubTerm( terms.takeLast() );
00250 orTerm.addSubTerm( term );
00251 terms.append( orTerm );
00252 }
00253 else if ( inAndBlock && !terms.isEmpty() ) {
00254 Term andTerm;
00255 andTerm.setType( Term::AndTerm );
00256 andTerm.addSubTerm( terms.takeLast() );
00257 andTerm.addSubTerm( term );
00258 terms.append( andTerm );
00259 }
00260 else {
00261 terms.append( term );
00262 }
00263 }
00264 }
00265 }
00266
00267 if ( terms.count() == 1 ) {
00268 return terms[0];
00269 }
00270 else if ( terms.count() > 0 ) {
00271 Term t;
00272 t.setType( Term::AndTerm );
00273 t.setSubTerms( terms );
00274 return t;
00275 }
00276 else {
00277 return Term();
00278 }
00279 }