00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "searchthread.h"
00021 #include "term.h"
00022 #include "qurlhash.h"
00023
00024 #include <Nepomuk/ResourceManager>
00025 #include <Nepomuk/Resource>
00026 #include <Nepomuk/Types/Property>
00027 #include <Nepomuk/Types/Class>
00028 #include <Nepomuk/Types/Literal>
00029
00030 #include <Soprano/Version>
00031 #include <Soprano/Model>
00032 #include <Soprano/QueryResultIterator>
00033 #include <Soprano/Node>
00034 #include <Soprano/Statement>
00035 #include <Soprano/LiteralValue>
00036 #include <Soprano/StatementIterator>
00037 #include <Soprano/Vocabulary/RDF>
00038 #include <Soprano/Vocabulary/RDFS>
00039 #include <Soprano/Vocabulary/NRL>
00040 #include <Soprano/Vocabulary/NAO>
00041 #include <Soprano/Vocabulary/XMLSchema>
00042 #include <Soprano/Vocabulary/OWL>
00043
00044 #include <KDebug>
00045
00046 #include <QtCore/QTime>
00047
00048
00049
00050
00051
00052
00053 #warning Make query optimization methods return an invalid term if the query cannot be resolved and handle this as no results
00054
00055 using namespace Soprano;
00056
00057 namespace {
00062 const int MAX_RESOURCES = 4;
00063
00064
00065 void mergeInResult( QHash<QUrl, Nepomuk::Search::Result>& results, const Nepomuk::Search::Result& resource ) {
00066 QHash<QUrl, Nepomuk::Search::Result>::iterator old = results.find( resource.resourceUri() );
00067 if ( old == results.end() ) {
00068 results.insert( resource.resourceUri(), resource );
00069 }
00070 else {
00071
00072 Nepomuk::Search::Result& result = *old;
00073 result.setScore( result.score() + resource.score() );
00074 }
00075 }
00076
00077 void mergeInResults( QHash<QUrl, Nepomuk::Search::Result>& results, const QHash<QUrl, Nepomuk::Search::Result>& otherResults ) {
00078 for ( QHash<QUrl, Nepomuk::Search::Result>::const_iterator it = otherResults.constBegin();
00079 it != otherResults.constEnd(); ++it ) {
00080 mergeInResult( results, it.value() );
00081 }
00082 }
00083
00084
00085
00086 QString luceneQueryEscape( const QString& s ) {
00087
00088
00089 static QRegExp rx( "([\\-" + QRegExp::escape( "+&|!(){}[]^\"~:\\" ) + "])" );
00090 QString es( s );
00091 es.replace( rx, "\\\\1" );
00092 return es;
00093 }
00094
00095 QString luceneQueryEscape( const QUrl& s ) {
00096 return luceneQueryEscape( QString::fromAscii( s.toEncoded() ) );
00097 }
00098
00099 QString createLuceneLiteralQuery( const QString& escaped ) {
00100 if ( escaped.contains( QRegExp( "\\s" ) ) ) {
00101 return "\"" + escaped + "\"";
00102 }
00103 else {
00104 return escaped;
00105 }
00106 }
00107
00108 QString createLuceneQuery( const Nepomuk::Search::SearchNode& node ) {
00109 if ( node.term.type() == Nepomuk::Search::Term::LiteralTerm ) {
00110 return createLuceneLiteralQuery( luceneQueryEscape( node.term.value().toString() ) );
00111 }
00112 else if ( node.term.type() == Nepomuk::Search::Term::ComparisonTerm ) {
00113 return luceneQueryEscape( node.term.property() ) + ':' + createLuceneLiteralQuery( luceneQueryEscape( node.term.subTerms().first().value().toString() ) );
00114 }
00115 else {
00116 Q_ASSERT( node.term.type() == Nepomuk::Search::Term::AndTerm ||
00117 node.term.type() == Nepomuk::Search::Term::OrTerm );
00118
00119 QStringList sq;
00120 foreach( const Nepomuk::Search::SearchNode& n, node.subNodes ) {
00121 sq += createLuceneQuery( n );
00122 }
00123 if ( node.term.type() == Nepomuk::Search::Term::AndTerm ) {
00124 return " ( " + sq.join( " AND " ) + " ) ";
00125 }
00126 else {
00127 return " ( " + sq.join( " OR " ) + " ) ";
00128 }
00129 }
00130 }
00131
00132 QString comparatorString( Nepomuk::Search::Term::Comparator c ) {
00133 switch( c ) {
00134 case Nepomuk::Search::Term::Contains:
00135 return ":";
00136 case Nepomuk::Search::Term::Equal:
00137 return "=";
00138 case Nepomuk::Search::Term::Greater:
00139 return ">";
00140 case Nepomuk::Search::Term::Smaller:
00141 return "<";
00142 case Nepomuk::Search::Term::GreaterOrEqual:
00143 return ">=";
00144 case Nepomuk::Search::Term::SmallerOrEqual:
00145 return "<=";
00146 }
00147
00148 return QString();
00149 }
00150
00151
00152 bool isNumberLiteralValue( const Soprano::LiteralValue& value ) {
00153 return value.isInt() || value.isInt64() || value.isUnsignedInt() || value.isUnsignedInt64() || value.isDouble();
00154 }
00155
00156
00157 QString createGraphPattern( const Nepomuk::Search::SearchNode& node, int& varCnt, const QString& varName = QString( "?r" ) )
00158 {
00159 switch( node.term.type() ) {
00160 case Nepomuk::Search::Term::ComparisonTerm: {
00161
00162 Nepomuk::Search::Term subTerm( node.term.subTerms().first() );
00163
00164
00165
00166
00167
00168 if ( subTerm.type() == Nepomuk::Search::Term::ResourceTerm ||
00169 subTerm.type() == Nepomuk::Search::Term::LiteralTerm ) {
00170 if( node.term.comparator() != Nepomuk::Search::Term::Equal ) {
00171
00172
00173 QString filter = QString( "?var%1 %2 " )
00174 .arg( ++varCnt )
00175 .arg( comparatorString( node.term.comparator() ) );
00176 if ( isNumberLiteralValue( subTerm.value() ) ) {
00177 filter += subTerm.value().toString();
00178 }
00179 else {
00180 Nepomuk::Types::Property prop( node.term.property() );
00181 filter += QString( "\"%1\"" ).arg( subTerm.value().toString() );
00182 if ( prop.literalRangeType().dataTypeUri().isValid() )
00183 filter += QString( "^^<%1>" ).arg( prop.literalRangeType().dataTypeUri().toString() );
00184 }
00185
00186 return QString( "%1 <%2> ?var%3 . FILTER(%4) . " )
00187 .arg( varName )
00188 .arg( QString::fromAscii( node.term.property().toEncoded() ) )
00189 .arg( varCnt )
00190 .arg( filter );
00191 }
00192 else {
00193 if ( subTerm.type() == Nepomuk::Search::Term::ResourceTerm ) {
00194 return QString( "%1 <%2> <%3> . " )
00195 .arg( varName )
00196 .arg( QString::fromAscii( node.term.property().toEncoded() ) )
00197 .arg( QString::fromAscii( subTerm.resource().toEncoded() ) );
00198 }
00199 else if ( Nepomuk::Types::Property( node.term.property() ).range().isValid() ) {
00200 return QString( "%7 <%1> ?x . { ?x <%2> \"%3\"^^<%4> . } UNION { ?x <%5> \"%3\"^^<%4>. } UNION { ?x <%6> \"%3\"^^<%4> . }" )
00201 .arg( QString::fromAscii( node.term.property().toEncoded() ) )
00202 .arg( Soprano::Vocabulary::RDFS::label().toString() )
00203 .arg( subTerm.value().toString() )
00204 .arg( Soprano::Vocabulary::XMLSchema::string().toString() )
00205 .arg( Soprano::Vocabulary::NAO::prefLabel().toString() )
00206 .arg( Soprano::Vocabulary::NAO::identifier().toString() )
00207 .arg( varName );
00208 }
00209 else {
00210 return QString( "%1 <%2> \"%3\"^^<%4> . " )
00211 .arg( varName )
00212 .arg( QString::fromAscii( node.term.property().toEncoded() ) )
00213 .arg( subTerm.value().toString() )
00214 .arg( Nepomuk::Types::Property( node.term.property() ).literalRangeType().dataTypeUri().toString() );
00215 }
00216 }
00217 }
00218
00219
00220
00221
00222
00223 else {
00224 QString bridgeVarName = QString( "?var%1" ).arg( ++varCnt );
00225 return QString( "%1 <%2> %3 . " )
00226 .arg( varName )
00227 .arg( QString::fromAscii( node.term.property().toEncoded() ) )
00228 .arg( bridgeVarName )
00229 + createGraphPattern( node.subNodes.first(), varCnt, bridgeVarName );
00230 }
00231 }
00232
00233 case Nepomuk::Search::Term::AndTerm: {
00234 QString s( "{ " );
00235 foreach( const Nepomuk::Search::SearchNode& n, node.subNodes ) {
00236 s += createGraphPattern( n, varCnt );
00237 }
00238 s += "} ";
00239 return s;
00240 }
00241
00242 case Nepomuk::Search::Term::OrTerm: {
00243 QStringList s;
00244 foreach( const Nepomuk::Search::SearchNode& n, node.subNodes ) {
00245 s += createGraphPattern( n, varCnt );
00246 }
00247 Q_ASSERT( !s.isEmpty() );
00248 return "{ " + s.join( " } UNION { " ) + " } ";
00249 }
00250
00251 default:
00252 Q_ASSERT_X( 0, "createGraphPattern", "unsupported Term type" );
00253 }
00254
00255 return QString();
00256 }
00257 }
00258
00259
00260 Nepomuk::Search::SearchThread::SearchThread( QObject* parent )
00261 : QThread( parent )
00262 {
00263 }
00264
00265
00266 Nepomuk::Search::SearchThread::~SearchThread()
00267 {
00268 }
00269
00270
00271 void Nepomuk::Search::SearchThread::query( const Query& term, double cutOffScore )
00272 {
00273 if( isRunning() ) {
00274 cancel();
00275 }
00276
00277 kDebug() << term << cutOffScore;
00278
00279 m_canceled = false;
00280 m_searchTerm = term;
00281 m_cutOffScore = cutOffScore;
00282 m_numResults = 0;
00283
00284 start();
00285 }
00286
00287
00288 void Nepomuk::Search::SearchThread::cancel()
00289 {
00290 m_canceled = true;
00291 wait();
00292 }
00293
00294
00295 void Nepomuk::Search::SearchThread::run()
00296 {
00297 QTime time;
00298 time.start();
00299
00300 if ( m_searchTerm.type() == Query::PlainQuery ) {
00301 kDebug() << "Plain Query: " << m_searchTerm;
00302 Term t = resolveFields( m_searchTerm.term() );
00303 kDebug() << "Fields resolved:" << t;
00304 t = resolveValues( t );
00305 kDebug() << "Values resolved:" << t;
00306 t = optimize( t );
00307 kDebug() << "Optimized query:" << t;
00308
00309 search( splitLuceneSparql( t ) , 1.0, true );
00310 }
00311 else {
00312
00313
00314 QString query = m_searchTerm.sparqlQuery();
00315 int pos = query.indexOf( QLatin1String( "where" ) );
00316 if ( pos > 0 ) {
00317 query.insert( pos, buildRequestPropertyVariableList() + ' ' );
00318 pos = query.lastIndexOf( '}' );
00319 if ( pos > 0 ) {
00320 query.insert( pos, ' ' + buildRequestPropertyPatterns() + ' ' );
00321 }
00322 }
00323
00324 sparqlQuery( query, 1.0, true );
00325 }
00326
00327 kDebug() << time.elapsed();
00328 }
00329
00330
00331 Nepomuk::Search::Term Nepomuk::Search::SearchThread::resolveFields( const Term& term )
00332 {
00333 switch( term.type() ) {
00334 case Term::AndTerm:
00335 case Term::OrTerm: {
00336 Term newTerm;
00337 newTerm.setType( term.type() );
00338 QList<Term> terms = term.subTerms();
00339 foreach( const Term& t, terms ) {
00340 if ( m_canceled ) break;
00341 newTerm.addSubTerm( resolveFields( t ) );
00342 }
00343 return newTerm;
00344 }
00345
00346
00347 case Term::ComparisonTerm: {
00348 Term newTerm( term );
00349 Term subTerm = term.subTerms().first();
00350 if ( subTerm.type() != Term::LiteralTerm &&
00351 subTerm.type() != Term::ResourceTerm ) {
00352 newTerm.setSubTerms( QList<Term>() << resolveFields( subTerm ) );
00353 }
00354
00355 if ( !newTerm.property().isValid() ) {
00356
00357 QList<QUrl> properties = matchFieldName( term.field() );
00358 if ( properties.count() > 0 ) {
00359 if ( properties.count() == 1 ) {
00360 newTerm.setProperty( properties.first() );
00361 return newTerm;
00362 }
00363 else {
00364 Term orTerm;
00365 orTerm.setType( Term::OrTerm );
00366 foreach( const QUrl& property, properties ) {
00367 Term t( newTerm );
00368 t.setProperty( property );
00369 orTerm.addSubTerm( t );
00370 }
00371 return orTerm;
00372 }
00373 }
00374 else {
00375 kDebug() << "Failed to resolve field" << term.field() << "to any property!";
00376 return Term();
00377 }
00378 }
00379 }
00380
00381 default:
00382 return term;
00383 }
00384 }
00385
00386
00387
00388 Nepomuk::Search::Term Nepomuk::Search::SearchThread::resolveValues( const Term& term )
00389 {
00390 switch( term.type() ) {
00391 case Term::AndTerm:
00392 case Term::OrTerm: {
00393 Term newTerm;
00394 newTerm.setType( term.type() );
00395 QList<Term> terms = term.subTerms();
00396 foreach( const Term& t, terms ) {
00397 if ( m_canceled ) break;
00398 newTerm.addSubTerm( resolveValues( t ) );
00399 }
00400 return newTerm;
00401 }
00402
00403
00404 case Term::ComparisonTerm: {
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419 if ( term.comparator() == Term::Contains &&
00420 term.subTerms().first().type() == Term::LiteralTerm ) {
00421
00422 Q_ASSERT ( term.property().isValid() );
00423
00424
00425
00426
00427 Nepomuk::Types::Property prop( term.property() );
00428 if ( prop.range().isValid() ) {
00429
00430 Term orTerm;
00431 orTerm.setType( Term::OrTerm );
00432
00433
00434
00435
00436
00437
00438
00439 QString query = QString( "%1:\"%2\" OR %3:\"%2\" OR %4:\"%2\"" )
00440 .arg( luceneQueryEscape( Soprano::Vocabulary::RDFS::label() ) )
00441 .arg( term.subTerms().first().value().toString() )
00442 .arg( luceneQueryEscape( Soprano::Vocabulary::NAO::prefLabel() ) )
00443 .arg( luceneQueryEscape( Soprano::Vocabulary::NAO::identifier() ) );
00444 Soprano::QueryResultIterator hits = ResourceManager::instance()->mainModel()->executeQuery( query,
00445 Soprano::Query::QueryLanguageUser,
00446 "lucene" );
00447
00448 while ( hits.next() ) {
00449 if ( m_canceled ) break;
00450
00451
00452 QUrl hit = hits.binding( 0 ).uri();
00453 if ( prop.range().uri() == Soprano::Vocabulary::RDFS::Resource() ||
00454 Nepomuk::Resource( hit ).hasType( prop.range().uri() ) ) {
00455 orTerm.addSubTerm( Term( term.property(), hit ) );
00456 if ( orTerm.subTerms().count() == MAX_RESOURCES ) {
00457 break;
00458 }
00459 }
00460 }
00461
00462 if ( orTerm.subTerms().count() == 1 ) {
00463 return orTerm.subTerms().first();
00464 }
00465 else if ( orTerm.subTerms().count() ) {
00466 return orTerm;
00467 }
00468 else {
00469 kDebug() << "Failed to match value" << term.subTerms().first().value() << "to any possible resource.";
00470 return term;
00471 }
00472 }
00473 else {
00474
00475 return term;
00476 }
00477 }
00478
00479
00480 else {
00481 Term newTerm( term );
00482 newTerm.setSubTerms( QList<Term>() << resolveValues( term.subTerms().first() ) );
00483 return newTerm;
00484 }
00485 }
00486
00487 default:
00488 return term;
00489 }
00490 }
00491
00492
00493 Nepomuk::Search::Term Nepomuk::Search::SearchThread::optimize( const Term& term )
00494 {
00495 switch( term.type() ) {
00496 case Term::AndTerm:
00497 case Term::OrTerm: {
00498 QList<Term> subTerms = term.subTerms();
00499 QList<Term> newSubTerms;
00500 QList<Term>::const_iterator end( subTerms.constEnd() );
00501 for ( QList<Term>::const_iterator it = subTerms.constBegin();
00502 it != end; ++it ) {
00503 const Term& t = *it;
00504 Term ot = optimize( t );
00505 if ( ot.type() == term.type() ) {
00506 newSubTerms += ot.subTerms();
00507 }
00508 else {
00509 newSubTerms += ot;
00510 }
00511 }
00512 Term newTerm;
00513 newTerm.setType( term.type() );
00514 newTerm.setSubTerms( newSubTerms );
00515 return newTerm;
00516 }
00517
00518 default:
00519 return term;
00520 }
00521 }
00522
00523
00524 Nepomuk::Search::SearchNode Nepomuk::Search::SearchThread::splitLuceneSparql( const Term& term )
00525 {
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535 switch( term.type() ) {
00536 case Term::LiteralTerm:
00537 return SearchNode( term, SearchNode::Lucene );
00538
00539 case Term::ComparisonTerm:
00540 if ( term.comparator() == Term::Contains &&
00541 term.subTerms().first().type() == Term::LiteralTerm ) {
00542
00543 return SearchNode( term, SearchNode::Lucene );
00544 }
00545 else {
00546
00547 SearchNode node( term, SearchNode::Sparql );
00548 node.subNodes += splitLuceneSparql( term.subTerms().first() );
00549 return node;
00550 }
00551
00552 case Term::AndTerm:
00553 case Term::OrTerm: {
00554 QList<Term> subTerms = term.subTerms();
00555 QList<SearchNode> luceneNodes, sparqlNodes, unknownNodes;
00556
00557 QList<Term>::const_iterator end( subTerms.constEnd() );
00558 for ( QList<Term>::const_iterator it = subTerms.constBegin();
00559 it != end; ++it ) {
00560 SearchNode node = splitLuceneSparql( *it );
00561 if ( node.type == SearchNode::Lucene ) {
00562 luceneNodes += node;
00563 }
00564 else if ( node.type == SearchNode::Sparql ) {
00565 sparqlNodes += node;
00566 }
00567 else {
00568 unknownNodes += node;
00569 }
00570 }
00571
00572 if ( luceneNodes.count() && !sparqlNodes.count() && !unknownNodes.count() ) {
00573 return SearchNode( term, SearchNode::Lucene, luceneNodes );
00574 }
00575 else if ( !luceneNodes.count() && sparqlNodes.count() && !unknownNodes.count() ) {
00576 return SearchNode( term, SearchNode::Sparql, sparqlNodes );
00577 }
00578 else if ( !luceneNodes.count() && !sparqlNodes.count() && unknownNodes.count() ) {
00579 return SearchNode( term, SearchNode::Unknown, unknownNodes );
00580 }
00581 else {
00582 Term newTerm;
00583 newTerm.setType( term.type() );
00584 SearchNode andNode( newTerm );
00585 if ( luceneNodes.count() )
00586 andNode.subNodes += SearchNode( term, SearchNode::Lucene, luceneNodes );
00587 if ( sparqlNodes.count() )
00588 andNode.subNodes += SearchNode( term, SearchNode::Sparql, sparqlNodes );
00589 if ( unknownNodes.count() )
00590 andNode.subNodes += SearchNode( term, SearchNode::Unknown, unknownNodes );
00591 return andNode;
00592 }
00593 }
00594
00595 default:
00596
00597 return SearchNode( Term() );
00598 }
00599 }
00600
00601
00602 QHash<QUrl, Nepomuk::Search::Result> Nepomuk::Search::SearchThread::search( const SearchNode& node, double baseScore, bool reportResults )
00603 {
00604 if ( node.type == SearchNode::Lucene ) {
00605 return luceneQuery( createLuceneQuery( node ), baseScore, reportResults );
00606 }
00607 else if ( node.type == SearchNode::Sparql ) {
00608 return sparqlQuery( createSparqlQuery( node ), baseScore, reportResults );
00609 }
00610 else if ( node.term.type() == Term::AndTerm ) {
00611 return andSearch( node.subNodes, baseScore, reportResults );
00612 }
00613 else {
00614 return orSearch( node.subNodes, baseScore, reportResults );
00615 }
00616 }
00617
00618
00619 QHash<QUrl, Nepomuk::Search::Result> Nepomuk::Search::SearchThread::andSearch( const QList<SearchNode>& nodes, double baseScore, bool reportResults )
00620 {
00621 QHash<QUrl, Result> results;
00622 bool first = true;
00623 foreach( const SearchNode& node, nodes ) {
00624 if ( m_canceled ) break;
00625
00626
00627 QHash<QUrl, Result> termResults = search( node, baseScore, false );
00628 if ( first ) {
00629 results = termResults;
00630 first = false;
00631 }
00632 else {
00633
00634
00635 QHash<QUrl, Result>::iterator it = results.begin();
00636 while ( it != results.end() ) {
00637 if ( m_canceled ) break;
00638 QHash<QUrl, Result>::const_iterator termIt = termResults.constFind( it.key() );
00639 if ( termIt != termResults.constEnd() ) {
00640
00641 it.value().setScore( it.value().score() + termIt.value().score() );
00642 ++it;
00643 }
00644 else {
00645 it = results.erase( it );
00646 }
00647 }
00648 }
00649 }
00650
00651 if ( reportResults ) {
00652 for ( QHash<QUrl, Result>::const_iterator it = results.constBegin();
00653 it != results.constEnd(); ++it ) {
00654 if ( m_canceled ) break;
00655 if ( m_searchTerm.limit() > 0 && m_numResults >= m_searchTerm.limit() ) {
00656 return results;
00657 }
00658 else {
00659 ++m_numResults;
00660 emit newResult( it.value() );
00661 }
00662 }
00663 }
00664
00665 return results;
00666 }
00667
00668
00669 QHash<QUrl, Nepomuk::Search::Result> Nepomuk::Search::SearchThread::orSearch( const QList<SearchNode>& nodes, double baseScore, bool reportResults )
00670 {
00671 QHash<QUrl, Result> results;
00672 foreach( const SearchNode& node, nodes ) {
00673 if ( m_canceled ) break;
00674
00675 mergeInResults( results, search( node, baseScore, reportResults ) );
00676 }
00677 if ( reportResults ) {
00678 for ( QHash<QUrl, Result>::const_iterator it = results.constBegin();
00679 it != results.constEnd(); ++it ) {
00680 if ( m_canceled ) break;
00681 if ( m_searchTerm.limit() > 0 && m_numResults >= m_searchTerm.limit() ) {
00682 return results;
00683 }
00684 else {
00685 ++m_numResults;
00686 emit newResult( it.value() );
00687 }
00688 }
00689 }
00690 return results;
00691 }
00692
00693
00694 QList<QUrl> Nepomuk::Search::SearchThread::matchFieldName( const QString& field )
00695 {
00696 kDebug() << field;
00697
00698 QList<QUrl> results;
00699
00700
00701
00702 QString query = QString( "select distinct ?p where { "
00703 "?p <%1> <%2> . "
00704 "?p <%3> \"%4\"^^<%5> . "
00705 "?x ?p ?y . }" )
00706 .arg( Soprano::Vocabulary::RDF::type().toString() )
00707 .arg( Soprano::Vocabulary::RDF::Property().toString() )
00708 .arg( Soprano::Vocabulary::RDFS::label().toString() )
00709 .arg( field )
00710 .arg( Soprano::Vocabulary::XMLSchema::string().toString() );
00711 kDebug() << "Direct match query:" << query;
00712
00713 Soprano::QueryResultIterator labelHits = ResourceManager::instance()->mainModel()->executeQuery( query,
00714 Soprano::Query::QueryLanguageSparql );
00715 if ( !m_canceled ) {
00716 while ( labelHits.next() ) {
00717 results << labelHits.binding( "p" ).uri();
00718 kDebug() << "Found direct match" << labelHits.binding( "p" ).uri();
00719 }
00720
00721 if ( results.isEmpty() ) {
00722
00723
00724
00725
00726
00727
00728
00729 query = QString( "select ?p where { "
00730 "?p <%1> <%2> . "
00731 "?p <%3> ?label . "
00732 "FILTER(REGEX(STR(?label),'%4','i')) . }" )
00733 .arg( Soprano::Vocabulary::RDF::type().toString() )
00734 .arg( Soprano::Vocabulary::RDF::Property().toString() )
00735 .arg( Soprano::Vocabulary::RDFS::label().toString() )
00736 .arg( field );
00737 kDebug() << "Indirect hit query:" << query;
00738 labelHits = ResourceManager::instance()->mainModel()->executeQuery( query,
00739 Soprano::Query::QueryLanguageSparql );
00740 QString newQuery;
00741 while ( labelHits.next() ) {
00742 results << labelHits.binding( "p" ).uri();
00743 kDebug() << "Found indirect match by label" << labelHits.binding( "p" ).uri();
00744 }
00745 }
00746
00747
00748 if ( results.isEmpty() ) {
00749 query = QString( "select ?p where { "
00750 "?p <%1> <%2> . "
00751 "FILTER(REGEX(STR(?p),'%3','i')) . }" )
00752 .arg( Soprano::Vocabulary::RDF::type().toString() )
00753 .arg( Soprano::Vocabulary::RDF::Property().toString() )
00754 .arg( field );
00755 kDebug() << "Indirect hit query:" << query;
00756 labelHits = ResourceManager::instance()->mainModel()->executeQuery( query,
00757 Soprano::Query::QueryLanguageSparql );
00758 QString newQuery;
00759 while ( labelHits.next() ) {
00760 results << labelHits.binding( "p" ).uri();
00761 kDebug() << "Found indirect match by name" << labelHits.binding( "p" ).uri();
00762 }
00763 }
00764 }
00765
00766 return results;
00767 }
00768
00769
00770 QString Nepomuk::Search::SearchThread::createSparqlQuery( const Nepomuk::Search::SearchNode& node )
00771 {
00772 int varCnt = 0;
00773 return QString( "select distinct ?r %1 where { graph ?g { ?r a ?type . } . ?g a <%2> . %3 %4 }" )
00774 .arg( buildRequestPropertyVariableList() )
00775 .arg( Soprano::Vocabulary::NRL::InstanceBase().toString() )
00776 .arg( createGraphPattern( node, varCnt ) )
00777 .arg( buildRequestPropertyPatterns() );
00778 }
00779
00780
00781 QHash<QUrl, Nepomuk::Search::Result> Nepomuk::Search::SearchThread::sparqlQuery( const QString& query, double baseScore, bool reportResults )
00782 {
00783 kDebug() << query;
00784
00785 QHash<QUrl, Result> results;
00786
00787 Soprano::QueryResultIterator hits = ResourceManager::instance()->mainModel()->executeQuery( query, Soprano::Query::QueryLanguageSparql );
00788 while ( hits.next() ) {
00789 if ( m_canceled ) break;
00790
00791 Result result = extractResult( hits );
00792 result.setScore( baseScore );
00793
00794 kDebug() << "Found result:" << result.resourceUri();
00795
00796
00797 if ( reportResults ) {
00798 if ( m_searchTerm.limit() > 0 && m_numResults >= m_searchTerm.limit() ) {
00799 return results;
00800 }
00801 else {
00802 ++m_numResults;
00803 emit newResult( result );
00804 }
00805 }
00806
00807 results.insert( result.resourceUri(), result );
00808 }
00809
00810 return results;
00811 }
00812
00813
00814 QHash<QUrl, Nepomuk::Search::Result> Nepomuk::Search::SearchThread::luceneQuery( const QString& query, double baseScore, bool reportResults )
00815 {
00816 QString finalQuery( query );
00817
00818
00819
00820
00821
00822
00823 #if SOPRANO_IS_VERSION(2,1,64)
00824 finalQuery += QString(" AND NOT %1:%2 AND NOT %1:%3 AND NOT %1:%4 AND NOT %1:%5 AND NOT %1:%6 AND NOT %1:%7")
00825 .arg( luceneQueryEscape(Soprano::Vocabulary::RDF::type()) )
00826 .arg( luceneQueryEscape(Soprano::Vocabulary::RDF::Property()) )
00827 .arg( luceneQueryEscape(Soprano::Vocabulary::RDFS::Class()) )
00828 .arg( luceneQueryEscape(Soprano::Vocabulary::OWL::Class()) )
00829 .arg( luceneQueryEscape(Soprano::Vocabulary::NRL::InstanceBase()) )
00830 .arg( luceneQueryEscape(Soprano::Vocabulary::NRL::Ontology()) )
00831 .arg( luceneQueryEscape(Soprano::Vocabulary::NRL::KnowledgeBase()) );
00832 #endif
00833
00834 kDebug() << finalQuery;
00835
00836 Soprano::QueryResultIterator hits = ResourceManager::instance()->mainModel()->executeQuery( finalQuery,
00837 Soprano::Query::QueryLanguageUser,
00838 "lucene" );
00839 QHash<QUrl, Result> results;
00840
00841 while ( hits.next() ) {
00842 if ( m_canceled ) break;
00843
00844 QUrl hitUri = hits.binding( 0 ).uri();
00845 double hitScore = hits.binding( 1 ).literal().toDouble() * baseScore;
00846
00847 if ( hitScore >= cutOffScore() ) {
00848 Result result( hitUri, hitScore );
00849
00850 if ( !m_searchTerm.requestProperties().isEmpty() ) {
00851
00852 fetchRequestPropertiesForResource( result );
00853 }
00854
00855
00856 if ( reportResults ) {
00857 if ( m_searchTerm.limit() > 0 && m_numResults >= m_searchTerm.limit() ) {
00858 return results;
00859 }
00860 else {
00861 ++m_numResults;
00862 kDebug() << "direct hit:" << hitUri << hitScore;
00863 emit newResult( result );
00864 }
00865 }
00866
00867 results.insert( hitUri, result );
00868 }
00869 else {
00870 kDebug() << "Score too low:" << hitUri << hitScore;
00871 }
00872 }
00873
00874 return results;
00875 }
00876
00877
00878 QString Nepomuk::Search::SearchThread::buildRequestPropertyVariableList() const
00879 {
00880 int numRequestProperties = m_searchTerm.requestProperties().count();
00881 QString s;
00882 for ( int i = 1; i <= numRequestProperties; ++i ) {
00883 s += QString( "?reqProp%1 " ).arg( i );
00884 }
00885 return s;
00886 }
00887
00888
00889 QString Nepomuk::Search::SearchThread::buildRequestPropertyPatterns() const
00890 {
00891 QList<Query::RequestProperty> requestProperties = m_searchTerm.requestProperties();
00892 QString s;
00893 int i = 1;
00894 foreach ( const Query::RequestProperty& rp, requestProperties ) {
00895 if ( rp.second ) {
00896 s += "OPTIONAL { ";
00897 }
00898
00899 s += QString( "?r <%1> ?reqProp%2 . " ).arg( QString::fromAscii( rp.first.toEncoded() ) ).arg( i++ );
00900
00901 if ( rp.second ) {
00902 s += "} ";
00903 }
00904 }
00905 return s;
00906 }
00907
00908
00909 Nepomuk::Search::Result Nepomuk::Search::SearchThread::extractResult( const Soprano::QueryResultIterator& it ) const
00910 {
00911 Result result( it.binding( 0 ).uri() );
00912
00913 int i = 1;
00914 QList<Query::RequestProperty> requestProperties = m_searchTerm.requestProperties();
00915 foreach ( const Query::RequestProperty& rp, requestProperties ) {
00916 result.addRequestProperty( rp.first, it.binding( QString("reqProp%1").arg( i++ ) ) );
00917 }
00918
00919
00920 return result;
00921 }
00922
00923
00924 void Nepomuk::Search::SearchThread::fetchRequestPropertiesForResource( Result& result )
00925 {
00926 QString q = QString( "select distinct %1 where { %2 }" )
00927 .arg( buildRequestPropertyVariableList() )
00928 .arg( buildRequestPropertyPatterns().replace( "?r ", '<' + QString::fromAscii( result.resourceUri().toEncoded() ) + "> " ) );
00929 kDebug() << q;
00930 Soprano::QueryResultIterator reqPropHits = ResourceManager::instance()->mainModel()->executeQuery( q, Soprano::Query::QueryLanguageSparql );
00931 if ( reqPropHits.next() ) {
00932 int i = 1;
00933 QList<Query::RequestProperty> requestProperties = m_searchTerm.requestProperties();
00934 foreach ( const Query::RequestProperty& rp, requestProperties ) {
00935 result.addRequestProperty( rp.first, reqPropHits.binding( QString("reqProp%1").arg( i++ ) ) );
00936 }
00937 }
00938 }
00939
00940 #include "searchthread.moc"