00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "indexscheduler.h"
00023 #include "config.h"
00024
00025 #include <QtCore/QMutexLocker>
00026 #include <QtCore/QList>
00027 #include <QtCore/QFile>
00028 #include <QtCore/QFileInfo>
00029 #include <QtCore/QDirIterator>
00030 #include <QtCore/QDateTime>
00031 #include <QtCore/QByteArray>
00032 #include <QtCore/QUrl>
00033
00034 #include <KDebug>
00035 #include <KTemporaryFile>
00036
00037 #include <map>
00038 #include <vector>
00039
00040 #include <strigi/strigiconfig.h>
00041 #include <strigi/indexwriter.h>
00042 #include <strigi/indexmanager.h>
00043 #include <strigi/indexreader.h>
00044 #include <strigi/analysisresult.h>
00045 #include <strigi/fileinputstream.h>
00046 #include <strigi/analyzerconfiguration.h>
00047
00048
00049
00050
00051 class StoppableConfiguration : public Strigi::AnalyzerConfiguration {
00052 public:
00053 StoppableConfiguration()
00054 : m_stop(false) {
00055 #if defined(STRIGI_IS_VERSION)
00056 #if STRIGI_IS_VERSION( 0, 6, 1 )
00057 setIndexArchiveContents( false );
00058 #endif
00059 #endif
00060 }
00061
00062 bool indexMore() const {
00063 return !m_stop;
00064 }
00065
00066 bool addMoreText() const {
00067 return !m_stop;
00068 }
00069
00070 void setStop( bool s ) {
00071 m_stop = s;
00072 }
00073
00074 private:
00075 bool m_stop;
00076 };
00077
00078
00079 namespace {
00080 enum UpdateDirFlags {
00084 NoUpdateFlags = 0x0,
00085
00089 UpdateRecursive = 0x1,
00090
00095 AutoUpdateFolder = 0x2
00096 };
00097 }
00098
00099
00100 Nepomuk::IndexScheduler::IndexScheduler( Strigi::IndexManager* manager, QObject* parent )
00101 : QThread( parent ),
00102 m_suspended( false ),
00103 m_stopped( false ),
00104 m_indexing( false ),
00105 m_indexManager( manager )
00106 {
00107 m_analyzerConfig = new StoppableConfiguration;
00108
00109 connect( Config::self(), SIGNAL( configChanged() ),
00110 this, SLOT( readConfig() ) );
00111 }
00112
00113
00114 Nepomuk::IndexScheduler::~IndexScheduler()
00115 {
00116 delete m_analyzerConfig;
00117 }
00118
00119
00120 void Nepomuk::IndexScheduler::suspend()
00121 {
00122 if ( isRunning() ) {
00123 QMutexLocker locker( &m_resumeStopMutex );
00124 m_suspended = true;
00125 }
00126 }
00127
00128
00129 void Nepomuk::IndexScheduler::resume()
00130 {
00131 if ( isRunning() ) {
00132 QMutexLocker locker( &m_resumeStopMutex );
00133 m_suspended = false;
00134 m_resumeStopWc.wakeAll();
00135 }
00136 }
00137
00138
00139 void Nepomuk::IndexScheduler::setSuspended( bool suspended )
00140 {
00141 if ( suspended )
00142 suspend();
00143 else
00144 resume();
00145 }
00146
00147
00148 void Nepomuk::IndexScheduler::stop()
00149 {
00150 if ( isRunning() ) {
00151 QMutexLocker locker( &m_resumeStopMutex );
00152 m_stopped = true;
00153 m_suspended = false;
00154 m_analyzerConfig->setStop( true );
00155 m_dirsToUpdateWc.wakeAll();
00156 m_resumeStopWc.wakeAll();
00157 }
00158 }
00159
00160
00161 bool Nepomuk::IndexScheduler::isSuspended() const
00162 {
00163 return isRunning() && m_suspended;
00164 }
00165
00166
00167 bool Nepomuk::IndexScheduler::isIndexing() const
00168 {
00169 return m_indexing;
00170 }
00171
00172
00173 QString Nepomuk::IndexScheduler::currentFolder() const
00174 {
00175 return m_currentFolder;
00176 }
00177
00178
00179 void Nepomuk::IndexScheduler::setIndexingStarted( bool started )
00180 {
00181 if ( started != m_indexing ) {
00182 m_indexing = started;
00183 if ( m_indexing )
00184 emit indexingStarted();
00185 else
00186 emit indexingStopped();
00187 }
00188 }
00189
00190
00191 void Nepomuk::IndexScheduler::run()
00192 {
00193
00194 setPriority( QThread::IdlePriority );
00195
00196
00197 m_suspended = false;
00198 m_stopped = false;
00199 m_analyzerConfig->setStop( false );
00200 readConfig();
00201
00202 Strigi::StreamAnalyzer analyzer( *m_analyzerConfig );
00203 analyzer.setIndexWriter( *m_indexManager->indexWriter() );
00204
00205 setIndexingStarted( true );
00206
00207
00208 m_dirsToUpdate.clear();
00209 foreach( const QString& f, Config::self()->folders() )
00210 m_dirsToUpdate << qMakePair( f, UpdateRecursive|AutoUpdateFolder );
00211
00212 while ( 1 ) {
00213
00214
00215 if ( m_dirsToUpdate.isEmpty() ) {
00216 setIndexingStarted( false );
00217
00218 m_dirsToUpdateMutex.lock();
00219 m_dirsToUpdateWc.wait( &m_dirsToUpdateMutex );
00220 m_dirsToUpdateMutex.unlock();
00221
00222 if ( !m_stopped )
00223 setIndexingStarted( true );
00224 }
00225
00226
00227 if ( !waitForContinue() ) {
00228 break;
00229 }
00230
00231
00232 m_dirsToUpdateMutex.lock();
00233 QPair<QString, int> dir = *m_dirsToUpdate.begin();
00234 m_dirsToUpdate.erase( m_dirsToUpdate.begin() );
00235 m_dirsToUpdateMutex.unlock();
00236
00237
00238 if ( !updateDir( dir.first, &analyzer, dir.second & UpdateRecursive ) ) {
00239 break;
00240 }
00241 m_currentFolder.clear();
00242 }
00243
00244 setIndexingStarted( false );
00245 }
00246
00247
00248
00249 bool Nepomuk::IndexScheduler::updateDir( const QString& dir, Strigi::StreamAnalyzer* analyzer, bool recursive )
00250 {
00251
00252
00253
00254 emit indexingFolder( dir );
00255
00256 m_currentFolder = dir;
00257
00258
00259 std::map<std::string, time_t> filesInStore;
00260 m_indexManager->indexReader()->getChildren( QFile::encodeName( dir ).data(), filesInStore );
00261 std::map<std::string, time_t>::const_iterator filesInStoreEnd = filesInStore.end();
00262
00263 QList<QFileInfo> filesToIndex;
00264 QList<QString> subFolders;
00265 std::vector<std::string> filesToDelete;
00266
00267
00268
00269 QDirIterator dirIt( dir, QDir::NoDotAndDotDot|QDir::Readable|QDir::Files|QDir::Dirs );
00270 while ( dirIt.hasNext() ) {
00271 QString path = dirIt.next();
00272
00273 QFileInfo fileInfo = dirIt.fileInfo();
00274
00275 bool indexFile = m_analyzerConfig->indexFile( QFile::encodeName( path ), QFile::encodeName( fileInfo.fileName() ) );
00276
00277
00278 std::map<std::string, time_t>::iterator filesInStoreIt = filesInStore.find( QFile::encodeName( path ).data() );
00279 bool newFile = ( filesInStoreIt == filesInStoreEnd );
00280
00281
00282 bool fileChanged = !newFile && fileInfo.lastModified().toTime_t() != filesInStoreIt->second;
00283
00284 if ( indexFile && ( newFile || fileChanged ) )
00285 filesToIndex << fileInfo;
00286
00287 if ( !newFile && ( fileChanged || !indexFile ) )
00288 filesToDelete.push_back( filesInStoreIt->first );
00289
00290
00291 if ( !newFile )
00292 filesInStore.erase( filesInStoreIt );
00293
00294 if ( indexFile && recursive && fileInfo.isDir() && !fileInfo.isSymLink() )
00295 subFolders << path;
00296 }
00297
00298
00299
00300 for ( std::map<std::string, time_t>::const_iterator it = filesInStore.begin();
00301 it != filesInStoreEnd; ++it ) {
00302 filesToDelete.push_back( it->first );
00303 }
00304
00305
00306 m_indexManager->indexWriter()->deleteEntries( filesToDelete );
00307
00308
00309 foreach( const QFileInfo& file, filesToIndex ) {
00310
00311
00312 if ( !waitForContinue() )
00313 return false;
00314
00315 analyzeFile( file, analyzer );
00316 }
00317
00318
00319
00320 if ( recursive ) {
00321 foreach( const QString& folder, subFolders ) {
00322 if ( !Config::self()->excludeFolders().contains( folder ) &&
00323 !updateDir( folder, analyzer, true ) )
00324 return false;
00325 }
00326 }
00327
00328 return true;
00329 }
00330
00331
00332 void Nepomuk::IndexScheduler::analyzeFile( const QFileInfo& file, Strigi::StreamAnalyzer* analyzer )
00333 {
00334
00335
00336 Strigi::AnalysisResult analysisresult( QFile::encodeName( file.filePath() ).data(),
00337 file.lastModified().toTime_t(),
00338 *m_indexManager->indexWriter(),
00339 *analyzer,
00340 QFile::encodeName( file.path() ).data() );
00341 if ( file.isFile() && !file.isSymLink() ) {
00342 Strigi::FileInputStream stream( QFile::encodeName( file.filePath() ) );
00343 analysisresult.index( &stream );
00344 }
00345 else {
00346 analysisresult.index(0);
00347 }
00348 }
00349
00350
00351 bool Nepomuk::IndexScheduler::waitForContinue()
00352 {
00353 QMutexLocker locker( &m_resumeStopMutex );
00354 if ( m_suspended ) {
00355 setIndexingStarted( false );
00356 m_resumeStopWc.wait( &m_resumeStopMutex );
00357 setIndexingStarted( true );
00358 }
00359
00360 return !m_stopped;
00361 }
00362
00363
00364 void Nepomuk::IndexScheduler::updateDir( const QString& path )
00365 {
00366 QMutexLocker lock( &m_dirsToUpdateMutex );
00367 m_dirsToUpdate << qMakePair( path, ( int )NoUpdateFlags );
00368 m_dirsToUpdateWc.wakeAll();
00369 }
00370
00371
00372 void Nepomuk::IndexScheduler::updateAll()
00373 {
00374 QMutexLocker lock( &m_dirsToUpdateMutex );
00375
00376
00377
00378 QSet<QPair<QString, int> >::iterator it = m_dirsToUpdate.begin();
00379 while ( it != m_dirsToUpdate.end() ) {
00380 if ( it->second & AutoUpdateFolder )
00381 it = m_dirsToUpdate.erase( it );
00382 else
00383 ++it;
00384 }
00385
00386
00387 foreach( const QString& f, Config::self()->folders() )
00388 m_dirsToUpdate << qMakePair( f, UpdateRecursive|AutoUpdateFolder );
00389
00390 m_dirsToUpdateWc.wakeAll();
00391 }
00392
00393
00394 void Nepomuk::IndexScheduler::readConfig()
00395 {
00396
00397 std::vector<std::pair<bool, std::string> > filters;
00398 QStringList excludeFilters = Config::self()->excludeFilters();
00399 QStringList includeFilters = Config::self()->includeFilters();
00400 foreach( const QString& filter, excludeFilters ) {
00401 filters.push_back( std::make_pair<bool, std::string>( false, filter.toUtf8().data() ) );
00402 }
00403 foreach( const QString& filter, includeFilters ) {
00404 filters.push_back( std::make_pair<bool, std::string>( true, filter.toUtf8().data() ) );
00405 }
00406 m_analyzerConfig->setFilters(filters);
00407 updateAll();
00408 }
00409
00410
00411 namespace {
00412 class QDataStreamStrigiBufferedStream : public Strigi::BufferedStream<char>
00413 {
00414 public:
00415 QDataStreamStrigiBufferedStream( QDataStream& stream )
00416 : m_stream( stream ) {
00417 }
00418
00419 int32_t fillBuffer( char* start, int32_t space ) {
00420 int r = m_stream.readRawData( start, space );
00421 if ( r == 0 ) {
00422
00423 return -1;
00424 }
00425 else if ( r < 0 ) {
00426
00427 m_status = Strigi::Error;
00428 return -1;
00429 }
00430 else {
00431 return r;
00432 }
00433 }
00434
00435 private:
00436 QDataStream& m_stream;
00437 };
00438 }
00439
00440
00441 void Nepomuk::IndexScheduler::analyzeResource( const QUrl& uri, const QDateTime& modificationTime, QDataStream& data )
00442 {
00443 QDateTime existingMTime = QDateTime::fromTime_t( m_indexManager->indexReader()->mTime( uri.toEncoded().data() ) );
00444 if ( existingMTime < modificationTime ) {
00445
00446 std::vector<std::string> entries;
00447 entries.push_back( uri.toEncoded().data() );
00448 m_indexManager->indexWriter()->deleteEntries( entries );
00449
00450
00451 Strigi::StreamAnalyzer analyzer( *m_analyzerConfig );
00452 analyzer.setIndexWriter( *m_indexManager->indexWriter() );
00453 Strigi::AnalysisResult analysisresult( uri.toEncoded().data(),
00454 modificationTime.toTime_t(),
00455 *m_indexManager->indexWriter(),
00456 analyzer );
00457 QDataStreamStrigiBufferedStream stream( data );
00458 analysisresult.index( &stream );
00459 }
00460 else {
00461 kDebug() << uri << "up to date";
00462 }
00463 }
00464
00465 #include "indexscheduler.moc"