• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • API Reference
  • Sitemap
  • Contact Us
 

NepomukDaemons

indexscheduler.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE Project
00002    Copyright (c) 2008 Sebastian Trueg <trueg@kde.org>
00003 
00004    Parts of this file are based on code from Strigi
00005    Copyright (C) 2006-2007 Jos van den Oever <jos@vandenoever.info>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Library General Public
00009    License version 2 as published by the Free Software Foundation.
00010 
00011    This library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Library General Public License for more details.
00015 
00016    You should have received a copy of the GNU Library General Public License
00017    along with this library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019    Boston, MA 02110-1301, USA.
00020 */
00021 
00022 #include "indexscheduler.h"
00023 #include "config.h"
00024 
00025 #include <QtCore/QMutexLocker>
00026 #include <QtCore/QList>
00027 #include <QtCore/QFile>
00028 #include <QtCore/QFileInfo>
00029 #include <QtCore/QDirIterator>
00030 #include <QtCore/QDateTime>
00031 #include <QtCore/QByteArray>
00032 #include <QtCore/QUrl>
00033 
00034 #include <KDebug>
00035 #include <KTemporaryFile>
00036 
00037 #include <map>
00038 #include <vector>
00039 
00040 #include <strigi/strigiconfig.h>
00041 #include <strigi/indexwriter.h>
00042 #include <strigi/indexmanager.h>
00043 #include <strigi/indexreader.h>
00044 #include <strigi/analysisresult.h>
00045 #include <strigi/fileinputstream.h>
00046 #include <strigi/analyzerconfiguration.h>
00047 
00048 
00049 // FIXME: remove all files from the datastore which are in folders not supposed to be indexed
00050 
00051 class StoppableConfiguration : public Strigi::AnalyzerConfiguration {
00052 public:
00053     StoppableConfiguration()
00054         : m_stop(false) {
00055 #if defined(STRIGI_IS_VERSION)
00056 #if STRIGI_IS_VERSION( 0, 6, 1 )
00057         setIndexArchiveContents( false );
00058 #endif
00059 #endif
00060     }
00061 
00062     bool indexMore() const {
00063         return !m_stop;
00064     }
00065 
00066     bool addMoreText() const {
00067         return !m_stop;
00068     }
00069 
00070     void setStop( bool s ) {
00071         m_stop = s;
00072     }
00073 
00074 private:
00075     bool m_stop;
00076 };
00077 
00078 
00079 namespace {
00080     enum UpdateDirFlags {
00084         NoUpdateFlags = 0x0,
00085 
00089         UpdateRecursive = 0x1,
00090 
00095         AutoUpdateFolder = 0x2
00096     };
00097 }
00098 
00099 
00100 Nepomuk::IndexScheduler::IndexScheduler( Strigi::IndexManager* manager, QObject* parent )
00101     : QThread( parent ),
00102       m_suspended( false ),
00103       m_stopped( false ),
00104       m_indexing( false ),
00105       m_indexManager( manager )
00106 {
00107     m_analyzerConfig = new StoppableConfiguration;
00108 
00109     connect( Config::self(), SIGNAL( configChanged() ),
00110              this, SLOT( readConfig() ) );
00111 }
00112 
00113 
00114 Nepomuk::IndexScheduler::~IndexScheduler()
00115 {
00116     delete m_analyzerConfig;
00117 }
00118 
00119 
00120 void Nepomuk::IndexScheduler::suspend()
00121 {
00122     if ( isRunning() ) {
00123         QMutexLocker locker( &m_resumeStopMutex );
00124         m_suspended = true;
00125     }
00126 }
00127 
00128 
00129 void Nepomuk::IndexScheduler::resume()
00130 {
00131     if ( isRunning() ) {
00132         QMutexLocker locker( &m_resumeStopMutex );
00133         m_suspended = false;
00134         m_resumeStopWc.wakeAll();
00135     }
00136 }
00137 
00138 
00139 void Nepomuk::IndexScheduler::setSuspended( bool suspended )
00140 {
00141     if ( suspended )
00142         suspend();
00143     else
00144         resume();
00145 }
00146 
00147 
00148 void Nepomuk::IndexScheduler::stop()
00149 {
00150     if ( isRunning() ) {
00151         QMutexLocker locker( &m_resumeStopMutex );
00152         m_stopped = true;
00153         m_suspended = false;
00154         m_analyzerConfig->setStop( true );
00155         m_dirsToUpdateWc.wakeAll();
00156         m_resumeStopWc.wakeAll();
00157     }
00158 }
00159 
00160 
00161 bool Nepomuk::IndexScheduler::isSuspended() const
00162 {
00163     return isRunning() && m_suspended;
00164 }
00165 
00166 
00167 bool Nepomuk::IndexScheduler::isIndexing() const
00168 {
00169     return m_indexing;
00170 }
00171 
00172 
00173 QString Nepomuk::IndexScheduler::currentFolder() const
00174 {
00175     return m_currentFolder;
00176 }
00177 
00178 
00179 void Nepomuk::IndexScheduler::setIndexingStarted( bool started )
00180 {
00181     if ( started != m_indexing ) {
00182         m_indexing = started;
00183         if ( m_indexing )
00184             emit indexingStarted();
00185         else
00186             emit indexingStopped();
00187     }
00188 }
00189 
00190 
00191 void Nepomuk::IndexScheduler::run()
00192 {
00193     // set lowest priority for this thread
00194     setPriority( QThread::IdlePriority );
00195 
00196     // initialization
00197     m_suspended = false;
00198     m_stopped = false;
00199     m_analyzerConfig->setStop( false );
00200     readConfig();
00201 
00202     Strigi::StreamAnalyzer analyzer( *m_analyzerConfig );
00203     analyzer.setIndexWriter( *m_indexManager->indexWriter() );
00204 
00205     setIndexingStarted( true );
00206 
00207     // do the actual indexing
00208     m_dirsToUpdate.clear();
00209     foreach( const QString& f, Config::self()->folders() )
00210         m_dirsToUpdate << qMakePair( f, UpdateRecursive|AutoUpdateFolder );
00211 
00212     while ( 1 ) {
00213         // wait for more dirs to analyze in case the initial
00214         // indexing is done
00215         if ( m_dirsToUpdate.isEmpty() ) {
00216             setIndexingStarted( false );
00217 
00218             m_dirsToUpdateMutex.lock();
00219             m_dirsToUpdateWc.wait( &m_dirsToUpdateMutex );
00220             m_dirsToUpdateMutex.unlock();
00221 
00222             if ( !m_stopped )
00223                 setIndexingStarted( true );
00224         }
00225 
00226         // wait for resume or stop (or simply continue)
00227         if ( !waitForContinue() ) {
00228             break;
00229         }
00230 
00231         // get the next folder
00232         m_dirsToUpdateMutex.lock();
00233         QPair<QString, int> dir = *m_dirsToUpdate.begin();
00234         m_dirsToUpdate.erase( m_dirsToUpdate.begin() );
00235         m_dirsToUpdateMutex.unlock();
00236 
00237         // update until stopped
00238         if ( !updateDir( dir.first, &analyzer, dir.second & UpdateRecursive ) ) {
00239             break;
00240         }
00241         m_currentFolder.clear();
00242     }
00243 
00244     setIndexingStarted( false );
00245 }
00246 
00247 
00248 // this method should be thread-safe ("should" because of the indexreader and -writer)
00249 bool Nepomuk::IndexScheduler::updateDir( const QString& dir, Strigi::StreamAnalyzer* analyzer, bool recursive )
00250 {
00251 //    kDebug() << dir << analyzer << recursive;
00252 
00253     // inform interested clients
00254     emit indexingFolder( dir );
00255 
00256     m_currentFolder = dir;
00257 
00258     // get a map of all indexed files from the dir including their stored mtime
00259     std::map<std::string, time_t> filesInStore;
00260     m_indexManager->indexReader()->getChildren( QFile::encodeName( dir ).data(), filesInStore );
00261     std::map<std::string, time_t>::const_iterator filesInStoreEnd = filesInStore.end();
00262 
00263     QList<QFileInfo> filesToIndex;
00264     QList<QString> subFolders;
00265     std::vector<std::string> filesToDelete;
00266 
00267     // iterate over all files in the dir
00268     // and select the ones we need to add or delete from the store
00269     QDirIterator dirIt( dir, QDir::NoDotAndDotDot|QDir::Readable|QDir::Files|QDir::Dirs );
00270     while ( dirIt.hasNext() ) {
00271         QString path = dirIt.next();
00272 
00273         QFileInfo fileInfo = dirIt.fileInfo();
00274 
00275         bool indexFile = m_analyzerConfig->indexFile( QFile::encodeName( path ), QFile::encodeName( fileInfo.fileName() ) );
00276 
00277         // check if this file is new by looking it up in the store
00278         std::map<std::string, time_t>::iterator filesInStoreIt = filesInStore.find( QFile::encodeName( path ).data() );
00279         bool newFile = ( filesInStoreIt == filesInStoreEnd );
00280 
00281         // do we need to update? Did the file change?
00282         bool fileChanged = !newFile && fileInfo.lastModified().toTime_t() != filesInStoreIt->second;
00283 
00284         if ( indexFile && ( newFile || fileChanged ) )
00285             filesToIndex << fileInfo;
00286 
00287         if ( !newFile && ( fileChanged || !indexFile ) )
00288             filesToDelete.push_back( filesInStoreIt->first );
00289 
00290         // cleanup a bit for faster lookups
00291         if ( !newFile )
00292             filesInStore.erase( filesInStoreIt );
00293 
00294         if ( indexFile && recursive && fileInfo.isDir() && !fileInfo.isSymLink() )
00295             subFolders << path;
00296     }
00297 
00298     // all the files left in filesInStore are not in the current
00299     // directory and should be deleted
00300     for ( std::map<std::string, time_t>::const_iterator it = filesInStore.begin();
00301           it != filesInStoreEnd; ++it ) {
00302         filesToDelete.push_back( it->first );
00303     }
00304 
00305     // remove all files that need updating or have been removed
00306     m_indexManager->indexWriter()->deleteEntries( filesToDelete );
00307 
00308     // analyze all files that are new or need updating
00309     foreach( const QFileInfo& file, filesToIndex ) {
00310 
00311         // wait if we are suspended or return if we are stopped
00312         if ( !waitForContinue() )
00313             return false;
00314 
00315         analyzeFile( file, analyzer );
00316     }
00317 
00318     // recurse into subdirs (we do this in a separate loop to always keep a proper state:
00319     // compare m_currentFolder)
00320     if ( recursive ) {
00321         foreach( const QString& folder, subFolders ) {
00322             if ( !Config::self()->excludeFolders().contains( folder ) &&
00323                  !updateDir( folder, analyzer, true ) )
00324                 return false;
00325         }
00326     }
00327 
00328     return true;
00329 }
00330 
00331 
00332 void Nepomuk::IndexScheduler::analyzeFile( const QFileInfo& file, Strigi::StreamAnalyzer* analyzer )
00333 {
00334 //    kDebug() << file.filePath();
00335 
00336     Strigi::AnalysisResult analysisresult( QFile::encodeName( file.filePath() ).data(),
00337                                            file.lastModified().toTime_t(),
00338                                            *m_indexManager->indexWriter(),
00339                                            *analyzer,
00340                                            QFile::encodeName( file.path() ).data() );
00341     if ( file.isFile() && !file.isSymLink() ) {
00342         Strigi::FileInputStream stream( QFile::encodeName( file.filePath() ) );
00343         analysisresult.index( &stream );
00344     }
00345     else {
00346         analysisresult.index(0);
00347     }
00348 }
00349 
00350 
00351 bool Nepomuk::IndexScheduler::waitForContinue()
00352 {
00353     QMutexLocker locker( &m_resumeStopMutex );
00354     if ( m_suspended ) {
00355         setIndexingStarted( false );
00356         m_resumeStopWc.wait( &m_resumeStopMutex );
00357         setIndexingStarted( true );
00358     }
00359 
00360     return !m_stopped;
00361 }
00362 
00363 
00364 void Nepomuk::IndexScheduler::updateDir( const QString& path )
00365 {
00366     QMutexLocker lock( &m_dirsToUpdateMutex );
00367     m_dirsToUpdate << qMakePair( path, ( int )NoUpdateFlags );
00368     m_dirsToUpdateWc.wakeAll();
00369 }
00370 
00371 
00372 void Nepomuk::IndexScheduler::updateAll()
00373 {
00374     QMutexLocker lock( &m_dirsToUpdateMutex );
00375 
00376     // remove previously added folders to not index stuff we are not supposed to
00377     // (FIXME: this does not include currently being indexed folders)
00378     QSet<QPair<QString, int> >::iterator it = m_dirsToUpdate.begin();
00379     while ( it != m_dirsToUpdate.end() ) {
00380         if ( it->second & AutoUpdateFolder )
00381             it = m_dirsToUpdate.erase( it );
00382         else
00383             ++it;
00384     }
00385 
00386     // update everything again in case the folders changed
00387     foreach( const QString& f, Config::self()->folders() )
00388         m_dirsToUpdate << qMakePair( f, UpdateRecursive|AutoUpdateFolder );
00389 
00390     m_dirsToUpdateWc.wakeAll();
00391 }
00392 
00393 
00394 void Nepomuk::IndexScheduler::readConfig()
00395 {
00396     // load Strigi configuration
00397     std::vector<std::pair<bool, std::string> > filters;
00398     QStringList excludeFilters = Config::self()->excludeFilters();
00399     QStringList includeFilters = Config::self()->includeFilters();
00400     foreach( const QString& filter, excludeFilters ) {
00401         filters.push_back( std::make_pair<bool, std::string>( false, filter.toUtf8().data() ) );
00402     }
00403     foreach( const QString& filter, includeFilters ) {
00404         filters.push_back( std::make_pair<bool, std::string>( true, filter.toUtf8().data() ) );
00405     }
00406     m_analyzerConfig->setFilters(filters);
00407     updateAll();
00408 }
00409 
00410 
00411 namespace {
00412     class QDataStreamStrigiBufferedStream : public Strigi::BufferedStream<char>
00413     {
00414     public:
00415         QDataStreamStrigiBufferedStream( QDataStream& stream )
00416             : m_stream( stream ) {
00417         }
00418 
00419         int32_t fillBuffer( char* start, int32_t space ) {
00420             int r = m_stream.readRawData( start, space );
00421             if ( r == 0 ) {
00422                 // Strigi's API is so weird!
00423                 return -1;
00424             }
00425             else if ( r < 0 ) {
00426                 // Again: weird API. m_status is a protected member of StreamBaseBase (yes, 2x Base)
00427                 m_status = Strigi::Error;
00428                 return -1;
00429             }
00430             else {
00431                 return r;
00432             }
00433         }
00434 
00435     private:
00436         QDataStream& m_stream;
00437     };
00438 }
00439 
00440 
00441 void Nepomuk::IndexScheduler::analyzeResource( const QUrl& uri, const QDateTime& modificationTime, QDataStream& data )
00442 {
00443     QDateTime existingMTime = QDateTime::fromTime_t( m_indexManager->indexReader()->mTime( uri.toEncoded().data() ) );
00444     if ( existingMTime < modificationTime ) {
00445         // remove the old data
00446         std::vector<std::string> entries;
00447         entries.push_back( uri.toEncoded().data() );
00448         m_indexManager->indexWriter()->deleteEntries( entries );
00449 
00450         // create the new
00451         Strigi::StreamAnalyzer analyzer( *m_analyzerConfig );
00452         analyzer.setIndexWriter( *m_indexManager->indexWriter() );
00453         Strigi::AnalysisResult analysisresult( uri.toEncoded().data(),
00454                                                modificationTime.toTime_t(),
00455                                                *m_indexManager->indexWriter(),
00456                                                analyzer );
00457         QDataStreamStrigiBufferedStream stream( data );
00458         analysisresult.index( &stream );
00459     }
00460     else {
00461         kDebug() << uri << "up to date";
00462     }
00463 }
00464 
00465 #include "indexscheduler.moc"

NepomukDaemons

Skip menu "NepomukDaemons"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

API Reference

Skip menu "API Reference"
  • KCMShell
  • KNotify
  • KStyles
  • Nepomuk Daemons
Generated for API Reference by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal