00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "sopranoindexwriter.h"
00021 #include "util.h"
00022
00023 #include <Soprano/Soprano>
00024 #include <Soprano/Vocabulary/RDF>
00025 #include <Soprano/Vocabulary/Xesam>
00026 #include <Soprano/LiteralValue>
00027
00028 #include <QtCore/QList>
00029 #include <QtCore/QHash>
00030 #include <QtCore/QVariant>
00031 #include <QtCore/QFileInfo>
00032 #include <QtCore/QFile>
00033 #include <QtCore/QUrl>
00034 #include <QtCore/QDebug>
00035 #include <QtCore/QThread>
00036 #include <QtCore/QDateTime>
00037 #include <QtCore/QByteArray>
00038 #include <QtCore/QUuid>
00039
00040 #include <KUrl>
00041
00042 #include <sys/stat.h>
00043 #include <stdlib.h>
00044 #include <string.h>
00045 #include <errno.h>
00046
00047 #include <map>
00048 #include <sstream>
00049 #include <algorithm>
00050
00051
00052
00053
00054 using namespace Soprano;
00055
00056
00057 uint qHash( const std::string& s )
00058 {
00059 return qHash( s.c_str() );
00060 }
00061
00062 namespace {
00063 QString findArchivePath( const QString& path ) {
00064 QString p( path );
00065 int i = 0;
00066 while ( ( i = p.lastIndexOf( '/' ) ) > 0 ) {
00067 p.truncate( i );
00068 if ( QFileInfo( p ).isFile() ) {
00069 return p;
00070 }
00071 }
00072 return QString();
00073 }
00074
00075 QUrl createResourceUri( const Strigi::AnalysisResult* idx ) {
00076
00077
00078
00079
00080
00081 QUrl uri;
00082 QString path = QFile::decodeName( idx->path().c_str() );
00083 if ( KUrl::isRelativeUrl( path ) )
00084 uri = QUrl::fromLocalFile( QFileInfo( path ).absoluteFilePath() );
00085 else
00086 uri = KUrl( path );
00087
00088 if ( idx->depth() > 0 ) {
00089 QString archivePath = findArchivePath( path );
00090 if ( QFile::exists( archivePath ) ) {
00091 if ( archivePath.endsWith( QLatin1String( ".tar" ) ) ||
00092 archivePath.endsWith( QLatin1String( ".tar.gz" ) ) ||
00093 archivePath.endsWith( QLatin1String( ".tar.bz2" ) ) ) {
00094 uri.setScheme( "tar" );
00095 }
00096 else if ( archivePath.endsWith( QLatin1String( ".zip" ) ) ) {
00097 uri.setScheme( "zip" );
00098 }
00099 }
00100 }
00101
00102
00103 if ( uri.scheme().isEmpty() ) {
00104 uri.setScheme( "file" );
00105 }
00106
00107 return uri;
00108 }
00109
00110 QUrl createGraphUri() {
00111 return QUrl( "urn:nepomuk:local:" + QUuid::createUuid().toString().remove( QRegExp( "[\\{\\}]" ) ) );
00112 }
00113
00114 class FileMetaData
00115 {
00116 public:
00117
00118 QUrl fileUri;
00119 QUrl context;
00120 std::string content;
00121 };
00122
00123 class RegisteredFieldData
00124 {
00125 public:
00126 RegisteredFieldData( const QUrl& prop, QVariant::Type t )
00127 : property( prop ),
00128 dataType( t ),
00129 isRdfType( prop == Vocabulary::RDF::type() ) {
00130 }
00131
00132 QUrl property;
00133 QVariant::Type dataType;
00134 bool isRdfType;
00135 };
00136 }
00137
00138
00139 class Strigi::Soprano::IndexWriter::Private
00140 {
00141 public:
00142 Private()
00143 : indexTransactionID( 0 ) {
00144 literalTypes[FieldRegister::stringType] = QVariant::String;
00145 literalTypes[FieldRegister::floatType] = QVariant::Double;
00146 literalTypes[FieldRegister::integerType] = QVariant::Int;
00147 literalTypes[FieldRegister::binaryType] = QVariant::ByteArray;
00148 literalTypes[FieldRegister::datetimeType] = QVariant::DateTime;
00149 }
00150
00151 QVariant::Type literalType( const Strigi::FieldProperties& strigiType ) {
00152
00153 QHash<std::string, QVariant::Type>::const_iterator it = literalTypes.constFind( strigiType.typeUri() );
00154 if ( it == literalTypes.constEnd() ) {
00155 return LiteralValue::typeFromDataTypeUri( QUrl::fromEncoded( strigiType.typeUri().c_str() ) );
00156 }
00157 else {
00158 return *it;
00159 }
00160 }
00161
00162 LiteralValue createLiteralValue( QVariant::Type type,
00163 const unsigned char* data,
00164 uint32_t size ) {
00165 QString value = QString::fromUtf8( ( const char* )data, size );
00166 if ( type == QVariant::DateTime ) {
00167 return LiteralValue( QDateTime::fromTime_t( value.toUInt() ) );
00168 }
00169 else if ( type != QVariant::Invalid ) {
00170 return LiteralValue::fromString( value, type );
00171 }
00172 else {
00173
00174 return LiteralValue( value );
00175 }
00176 }
00177
00178 ::Soprano::Model* repository;
00179 int indexTransactionID;
00180
00181 private:
00182 QHash<std::string, QVariant::Type> literalTypes;
00183 };
00184
00185
00186 Strigi::Soprano::IndexWriter::IndexWriter( ::Soprano::Model* model )
00187 : Strigi::IndexWriter()
00188 {
00189
00190 d = new Private;
00191 d->repository = model;
00192 Util::storeStrigiMiniOntology( d->repository );
00193
00194 }
00195
00196
00197 Strigi::Soprano::IndexWriter::~IndexWriter()
00198 {
00199 delete d;
00200 }
00201
00202
00203 void Strigi::Soprano::IndexWriter::commit()
00204 {
00205 }
00206
00207
00208
00209 void Strigi::Soprano::IndexWriter::deleteEntries( const std::vector<std::string>& entries )
00210 {
00211
00212
00213 QString systemLocationUri = Util::fieldUri( FieldRegister::pathFieldName ).toString();
00214 for ( unsigned int i = 0; i < entries.size(); ++i ) {
00215 QString path = QString::fromUtf8( entries[i].c_str() );
00216 QString query = QString( "select ?g ?mg where { "
00217 "{ { ?r <%1> \"%2\"^^<%3> . } UNION { ?r <%1> %6 . } } . "
00218 "?g <%4> ?r . "
00219 "OPTIONAL { ?mg <%5> ?g . } }" )
00220 .arg( systemLocationUri )
00221 .arg( path )
00222 .arg( Vocabulary::XMLSchema::string().toString() )
00223 .arg( Strigi::Ontology::indexGraphFor().toString() )
00224 .arg( Vocabulary::NRL::coreGraphMetadataFor().toString() )
00225 .arg( Node( QUrl::fromLocalFile( path ) ).toN3() );
00226
00227 qDebug() << "deleteEntries query:" << query;
00228
00229 QueryResultIterator result = d->repository->executeQuery( query, ::Soprano::Query::QueryLanguageSparql );
00230 if ( result.next() ) {
00231 Node indexGraph = result.binding( "g" );
00232 Node metaDataGraph = result.binding( "mg" );
00233
00234 result.close();
00235
00236
00237 d->repository->removeContext( indexGraph );
00238
00239
00240 if ( metaDataGraph.isValid() )
00241 d->repository->removeContext( metaDataGraph );
00242 else
00243 d->repository->removeAllStatements( Statement( indexGraph, Node(), Node() ) );
00244 }
00245 }
00246 }
00247
00248
00249 void Strigi::Soprano::IndexWriter::deleteAllEntries()
00250 {
00251
00252
00253
00254 QString query = QString( "select ?g where { ?g <%1> ?r . }" ).arg( Strigi::Ontology::indexGraphFor().toString() );
00255
00256 qDebug() << "deleteAllEntries query:" << query;
00257
00258 QueryResultIterator result = d->repository->executeQuery( query, ::Soprano::Query::QUERY_LANGUAGE_SPARQL );
00259 QList<Node> allIndexGraphs = result.iterateBindings( "g" ).allNodes();
00260 for ( QList<Node>::const_iterator it = allIndexGraphs.constBegin(); it != allIndexGraphs.constEnd(); ++it ) {
00261 Node indexGraph = *it;
00262
00263 qDebug() << "Found indexGraph to delete:" << indexGraph;
00264
00265
00266 d->repository->removeContext( indexGraph );
00267
00268
00269 d->repository->removeAllStatements( Statement( indexGraph, Node(), Node() ) );
00270 }
00271 }
00272
00273
00274
00275 void Strigi::Soprano::IndexWriter::startAnalysis( const AnalysisResult* idx )
00276 {
00277 if ( idx->depth() > 0 ) {
00278 return;
00279 }
00280
00281
00282 FileMetaData* data = new FileMetaData();
00283 data->fileUri = createResourceUri( idx );
00284
00285
00286 StatementIterator it = d->repository->listStatements( Node(),
00287 Strigi::Ontology::indexGraphFor(),
00288 data->fileUri );
00289 if ( it.next() ) {
00290 data->context = it.current().subject().uri();
00291 }
00292 else {
00293 data->context = createGraphUri();
00294 }
00295
00296
00297
00298 idx->setWriterData( data );
00299 }
00300
00301
00302 void Strigi::Soprano::IndexWriter::addText( const AnalysisResult* idx, const char* text, int32_t length )
00303 {
00304 if ( idx->depth() > 0 ) {
00305 return;
00306 }
00307
00308 FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
00309 md->content.append( text, length );
00310 }
00311
00312
00313 void Strigi::Soprano::IndexWriter::addValue( const AnalysisResult* idx,
00314 const RegisteredField* field,
00315 const std::string& value )
00316 {
00317 if ( idx->depth() > 0 ) {
00318 return;
00319 }
00320
00321
00322 if ( value.length() > 0 ) {
00323 FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
00324 RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( field->writerData() );
00325
00326
00327
00328 if ( rfd->isRdfType ) {
00329 d->repository->addStatement( md->fileUri,
00330 ::Soprano::Vocabulary::RDF::type(),
00331 QUrl::fromEncoded( value.c_str(), QUrl::StrictMode ),
00332 md->context );
00333 }
00334 else {
00335
00336
00337 if ( field->key() == FieldRegister::pathFieldName ||
00338 field->key() == FieldRegister::parentLocationFieldName ) {
00339 d->repository->addStatement( md->fileUri,
00340 rfd->property,
00341 QUrl::fromLocalFile( QFile::decodeName( QByteArray::fromRawData( value.c_str(), value.length() ) ) ),
00342 md->context );
00343 }
00344 else {
00345 d->repository->addStatement( Statement( md->fileUri,
00346 rfd->property,
00347 d->createLiteralValue( rfd->dataType, ( unsigned char* )value.c_str(), value.length() ),
00348 md->context) );
00349 }
00350 }
00351 if ( d->repository->lastError() )
00352 qDebug() << "Failed to add value" << value.c_str();
00353 }
00354
00355 }
00356
00357
00358
00359 void Strigi::Soprano::IndexWriter::addValue( const AnalysisResult* idx,
00360 const RegisteredField* field,
00361 const unsigned char* data,
00362 uint32_t size )
00363 {
00364 addValue( idx, field, std::string( ( const char* )data, size ) );
00365 }
00366
00367
00368 void Strigi::Soprano::IndexWriter::addValue( const AnalysisResult*, const RegisteredField*,
00369 const std::string&, const std::string& )
00370 {
00371
00372 }
00373
00374
00375 void Strigi::Soprano::IndexWriter::addValue( const AnalysisResult* idx,
00376 const RegisteredField* field,
00377 uint32_t value )
00378 {
00379 if ( idx->depth() > 0 ) {
00380 return;
00381 }
00382
00383
00384 FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
00385 RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( field->writerData() );
00386
00387 LiteralValue val( value );
00388 if ( field->type() == FieldRegister::datetimeType ) {
00389 val = QDateTime::fromTime_t( value );
00390 }
00391
00392 d->repository->addStatement( Statement( md->fileUri,
00393 rfd->property,
00394 val,
00395 md->context) );
00396
00397 }
00398
00399
00400 void Strigi::Soprano::IndexWriter::addValue( const AnalysisResult* idx,
00401 const RegisteredField* field,
00402 int32_t value )
00403 {
00404 if ( idx->depth() > 0 ) {
00405 return;
00406 }
00407
00408
00409 FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
00410 RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( field->writerData() );
00411
00412 d->repository->addStatement( Statement( md->fileUri,
00413 rfd->property,
00414 LiteralValue( value ),
00415 md->context) );
00416
00417 }
00418
00419
00420 void Strigi::Soprano::IndexWriter::addValue( const AnalysisResult* idx,
00421 const RegisteredField* field,
00422 double value )
00423 {
00424 if ( idx->depth() > 0 ) {
00425 return;
00426 }
00427
00428
00429 FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
00430 RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( field->writerData() );
00431
00432 d->repository->addStatement( Statement( md->fileUri,
00433 rfd->property,
00434 LiteralValue( value ),
00435 md->context) );
00436
00437 }
00438
00439
00440 void Strigi::Soprano::IndexWriter::addTriplet( const std::string& subject,
00441 const std::string& predicate, const std::string& object )
00442 {
00443
00444
00445
00446
00447 d->repository->addStatement( Statement( Node( QUrl( QString::fromUtf8( subject.c_str() ) ) ),
00448 Node( QUrl( QString::fromUtf8( predicate.c_str() ) ) ),
00449 Node( QUrl( QString::fromUtf8( object.c_str() ) ) ),
00450 Node() ) );
00451 }
00452
00453
00454
00455 void Strigi::Soprano::IndexWriter::finishAnalysis( const AnalysisResult* idx )
00456 {
00457 if ( idx->depth() > 0 ) {
00458 return;
00459 }
00460
00461
00462 FileMetaData* md = static_cast<FileMetaData*>( idx->writerData() );
00463
00464 if ( md->content.length() > 0 ) {
00465 d->repository->addStatement( Statement( md->fileUri,
00466 Vocabulary::Xesam::asText(),
00467 LiteralValue( QString::fromUtf8( md->content.c_str() ) ),
00468 md->context ) );
00469 if ( d->repository->lastError() )
00470 qDebug() << "Failed to add" << md->fileUri << "as text" << QString::fromUtf8( md->content.c_str() );
00471 }
00472
00473
00474
00475 if ( QFileInfo( QFile::decodeName( idx->path().c_str() ) ).isDir() )
00476 d->repository->addStatement( Statement( md->fileUri,
00477 Vocabulary::RDF::type(),
00478 Vocabulary::Xesam::Folder(),
00479 md->context ) );
00480 else
00481 d->repository->addStatement( Statement( md->fileUri,
00482 Vocabulary::RDF::type(),
00483 Vocabulary::Xesam::File(),
00484 md->context ) );
00485
00486
00487
00488
00489 QUrl metaDataContext = md->context.toString() + "-metadata";
00490 d->repository->addStatement( Statement( md->context,
00491 Vocabulary::RDF::type(),
00492 Vocabulary::NRL::InstanceBase(),
00493 metaDataContext ) );
00494 d->repository->addStatement( Statement( md->context,
00495 Vocabulary::NAO::created(),
00496 LiteralValue( QDateTime::currentDateTime() ),
00497 metaDataContext ) );
00498 d->repository->addStatement( Statement( md->context,
00499 Strigi::Ontology::indexGraphFor(),
00500 md->fileUri,
00501 metaDataContext ) );
00502 d->repository->addStatement( Statement( metaDataContext,
00503 Vocabulary::RDF::type(),
00504 Vocabulary::NRL::GraphMetadata(),
00505 metaDataContext ) );
00506 d->repository->addStatement( metaDataContext,
00507 Vocabulary::NRL::coreGraphMetadataFor(),
00508 md->context,
00509 metaDataContext );
00510
00511
00512 delete md;
00513 idx->setWriterData( 0 );
00514
00515
00516 }
00517
00518
00519 void Strigi::Soprano::IndexWriter::initWriterData( const Strigi::FieldRegister& f )
00520 {
00521 map<string, RegisteredField*>::const_iterator i;
00522 map<string, RegisteredField*>::const_iterator end = f.fields().end();
00523 for (i = f.fields().begin(); i != end; ++i) {
00524 QUrl prop = Util::fieldUri( i->second->key() );
00525 i->second->setWriterData( new RegisteredFieldData( prop,
00526 prop == Vocabulary::RDF::type()
00527 ? QVariant::Invalid
00528 : d->literalType( i->second->properties() ) ) );
00529 }
00530 }
00531
00532
00533 void Strigi::Soprano::IndexWriter::releaseWriterData( const Strigi::FieldRegister& f )
00534 {
00535 map<string, RegisteredField*>::const_iterator i;
00536 map<string, RegisteredField*>::const_iterator end = f.fields().end();
00537 for (i = f.fields().begin(); i != end; ++i) {
00538 delete static_cast<RegisteredFieldData*>( i->second->writerData() );
00539 i->second->setWriterData( 0 );
00540 }
00541 }