KDECore
ksycocadict.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "ksycocadict.h"
00020 #include "ksycocaentry.h"
00021 #include "ksycoca.h"
00022 #include "kdebug.h"
00023
00024 #include <QtCore/QBitArray>
00025
00026 namespace
00027 {
00028 struct string_entry {
00029 string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload)
00030 : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload)
00031 {}
00032 uint hash;
00033 const int length;
00034 const QString keyStr;
00035 const QChar * const key;
00036 const KSycocaEntry::Ptr payload;
00037 };
00038 }
00039
00040 class KSycocaDictStringList : public QList<string_entry*>
00041 {
00042 public:
00043 ~KSycocaDictStringList() {
00044 qDeleteAll(*this);
00045 }
00046 };
00047
00048 class KSycocaDict::Private
00049 {
00050 public:
00051 Private()
00052 : stringlist( 0 ),
00053 stream( 0 ),
00054 offset( 0 )
00055 {
00056 }
00057
00058 ~Private()
00059 {
00060 delete stringlist;
00061 }
00062
00063
00064 qint32 offsetForKey(const QString& key) const;
00065
00066
00067 quint32 hashKey(const QString & key) const;
00068
00069 KSycocaDictStringList *stringlist;
00070 QDataStream *stream;
00071 qint32 offset;
00072 quint32 hashTableSize;
00073 QList<qint32> hashList;
00074 };
00075
00076 KSycocaDict::KSycocaDict()
00077 : d( new Private )
00078 {
00079 }
00080
00081 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
00082 : d( new Private )
00083 {
00084 d->stream = str;
00085 d->offset = offset;
00086
00087 quint32 test1, test2;
00088 str->device()->seek(offset);
00089 (*str) >> test1 >> test2;
00090 if ((test1 > 0x000fffff) || (test2 > 1024))
00091 {
00092 KSycoca::flagError();
00093 d->hashTableSize = 0;
00094 d->offset = 0;
00095 return;
00096 }
00097
00098 str->device()->seek(offset);
00099 (*str) >> d->hashTableSize;
00100 (*str) >> d->hashList;
00101 d->offset = str->device()->pos();
00102 }
00103
00104 KSycocaDict::~KSycocaDict()
00105 {
00106 delete d;
00107 }
00108
00109 void
00110 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload)
00111 {
00112 if (key.isEmpty()) return;
00113 if (!payload) return;
00114 if (!d->stringlist)
00115 {
00116 d->stringlist = new KSycocaDictStringList;
00117 }
00118
00119 string_entry *entry = new string_entry(key, payload);
00120 d->stringlist->append(entry);
00121 }
00122
00123 void
00124 KSycocaDict::remove(const QString &key)
00125 {
00126 if ( !d || !d->stringlist )
00127 return;
00128
00129 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00130 string_entry* entry = *it;
00131 if (entry->keyStr == key) {
00132 d->stringlist->erase(it);
00133 delete entry;
00134 break;
00135 }
00136 }
00137 }
00138
00139 int KSycocaDict::find_string(const QString &key ) const
00140 {
00141 Q_ASSERT(d);
00142
00143
00144 qint32 offset = d->offsetForKey(key);
00145
00146
00147 if (offset == 0)
00148 return 0;
00149
00150 if (offset > 0)
00151 return offset;
00152
00153
00154 offset = -offset;
00155
00156 d->stream->device()->seek(offset);
00157
00158
00159 while(true)
00160 {
00161 (*d->stream) >> offset;
00162 if (offset == 0) break;
00163 QString dupkey;
00164 (*d->stream) >> dupkey;
00165
00166 if (dupkey == key) return offset;
00167 }
00168
00169
00170 return 0;
00171 }
00172
00173
00174 QList<int> KSycocaDict::findMultiString(const QString &key ) const
00175 {
00176 qint32 offset = d->offsetForKey(key);
00177 QList<int> offsetList;
00178 if (offset == 0)
00179 return offsetList;
00180
00181 if (offset > 0) {
00182 offsetList.append(offset);
00183 return offsetList;
00184 }
00185
00186
00187 offset = -offset;
00188
00189 d->stream->device()->seek(offset);
00190
00191
00192 while(true)
00193 {
00194 (*d->stream) >> offset;
00195 if (offset == 0) break;
00196 QString dupkey;
00197 (*d->stream) >> dupkey;
00198
00199 if (dupkey == key)
00200 offsetList.append(offset);
00201 }
00202 return offsetList;
00203 }
00204
00205 uint KSycocaDict::count()
00206 {
00207 if ( !d || !d->stringlist ) return 0;
00208
00209 return d->stringlist->count();
00210 }
00211
00212 void
00213 KSycocaDict::clear()
00214 {
00215 delete d;
00216 d = 0;
00217 }
00218
00219 uint KSycocaDict::Private::hashKey( const QString &key) const
00220 {
00221 int l = key.length();
00222 register uint h = 0;
00223
00224 for(int i = 0; i < hashList.count(); i++)
00225 {
00226 int pos = hashList[i];
00227 if (pos < 0)
00228 {
00229 pos = -pos-1;
00230 if (pos < l)
00231 h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff;
00232 }
00233 else
00234 {
00235 pos = pos-1;
00236 if (pos < l)
00237 h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
00238 }
00239 }
00240 return h;
00241 }
00242
00243
00244
00245 static int
00246 calcDiversity(KSycocaDictStringList *stringlist, int pos, int sz)
00247 {
00248 if (pos == 0) return 0;
00249 QBitArray matrix(sz);
00250 uint usz = sz;
00251
00252 if (pos < 0)
00253 {
00254 pos = -pos-1;
00255 for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00256 {
00257 string_entry* entry = *it;
00258 register int l = entry->length;
00259 if (pos < l && pos != 0)
00260 {
00261 register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff;
00262 matrix.setBit( hash % usz, true );
00263 }
00264 }
00265 }
00266 else
00267 {
00268 pos = pos-1;
00269 for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00270 {
00271 string_entry* entry = *it;
00272 if (pos < entry->length)
00273 {
00274 register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
00275 matrix.setBit( hash % usz, true );
00276 }
00277 }
00278 }
00279 int diversity = 0;
00280 for(int i=0;i< sz;i++)
00281 if (matrix.testBit(i)) diversity++;
00282
00283 return diversity;
00284 }
00285
00286
00287
00288 static void
00289 addDiversity(KSycocaDictStringList *stringlist, int pos)
00290 {
00291 if (pos == 0) return;
00292 if (pos < 0)
00293 {
00294 pos = -pos-1;
00295 for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00296 {
00297 string_entry* entry = *it;
00298 register int l = entry->length;
00299 if (pos < l)
00300 entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff;
00301 }
00302 }
00303 else
00304 {
00305 pos = pos - 1;
00306 for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00307 {
00308 string_entry* entry = *it;
00309 if (pos < entry->length)
00310 entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
00311 }
00312 }
00313 }
00314
00315
00316 void
00317 KSycocaDict::save(QDataStream &str)
00318 {
00319 if (count() == 0)
00320 {
00321 d->hashTableSize = 0;
00322 d->hashList.clear();
00323 str << d->hashTableSize;
00324 str << d->hashList;
00325 return;
00326 }
00327
00328 d->offset = str.device()->pos();
00329
00330
00331
00332
00333
00334 int maxLength = 0;
00335
00336 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00337 {
00338 string_entry* entry = *it;
00339 entry->hash = 0;
00340 if (entry->length > maxLength)
00341 maxLength = entry->length;
00342 }
00343
00344
00345
00346
00347
00348
00349 register unsigned int sz = count()*4 + 1;
00350 while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13))))
00351 sz+=2;
00352
00353 int maxDiv = 0;
00354 int maxPos = 0;
00355 int lastDiv = 0;
00356
00357 d->hashList.clear();
00358
00359
00360
00361 int *oldvec=new int[maxLength*2+1];
00362 for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0;
00363 int mindiv=0;
00364
00365 while(true)
00366 {
00367 int divsum=0,divnum=0;
00368
00369 maxDiv = 0;
00370 maxPos = 0;
00371 for(int pos=-maxLength; pos <= maxLength; pos++)
00372 {
00373
00374 if (oldvec[pos+maxLength]<mindiv)
00375 { oldvec[pos+maxLength]=0; continue; }
00376
00377 int diversity = calcDiversity(d->stringlist, pos, sz);
00378 if (diversity > maxDiv)
00379 {
00380 maxDiv = diversity;
00381 maxPos = pos;
00382 }
00383 oldvec[pos+maxLength]=diversity;
00384 divsum+=diversity; divnum++;
00385 }
00386
00387 if (divnum)
00388 mindiv=(3*divsum)/(4*divnum);
00389
00390 if (maxDiv <= lastDiv)
00391 break;
00392
00393 lastDiv = maxDiv;
00394 addDiversity(d->stringlist, maxPos);
00395 d->hashList.append(maxPos);
00396 }
00397
00398 delete [] oldvec;
00399
00400 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00401 (*it)->hash = d->hashKey((*it)->keyStr);
00402
00403
00404 d->hashTableSize = sz;
00405
00406 struct hashtable_entry {
00407 string_entry *entry;
00408 QList<string_entry*>* duplicates;
00409 int duplicate_offset;
00410 };
00411
00412 hashtable_entry *hashTable = new hashtable_entry[ sz ];
00413
00414
00415 for (unsigned int i=0; i < sz; i++)
00416 {
00417 hashTable[i].entry = 0;
00418 hashTable[i].duplicates = 0;
00419 }
00420
00421
00422 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00423 {
00424 string_entry* entry = *it;
00425 int hash = entry->hash % sz;
00426 if (!hashTable[hash].entry)
00427 {
00428 hashTable[hash].entry = entry;
00429 }
00430 else
00431 {
00432 if (!hashTable[hash].duplicates)
00433 {
00434 hashTable[hash].duplicates = new QList<string_entry*>;
00435 hashTable[hash].duplicates->append(hashTable[hash].entry);
00436 hashTable[hash].duplicate_offset = 0;
00437 }
00438 hashTable[hash].duplicates->append(entry);
00439 }
00440 }
00441
00442 str << d->hashTableSize;
00443 str << d->hashList;
00444
00445 d->offset = str.device()->pos();
00446
00447
00448
00449
00450
00451 for(int pass = 1; pass <= 2; pass++)
00452 {
00453 str.device()->seek(d->offset);
00454
00455 for(uint i=0; i < d->hashTableSize; i++)
00456 {
00457 qint32 tmpid;
00458 if (!hashTable[i].entry)
00459 tmpid = (qint32) 0;
00460 else if (!hashTable[i].duplicates)
00461 tmpid = (qint32) hashTable[i].entry->payload->offset();
00462 else
00463 tmpid = (qint32) -hashTable[i].duplicate_offset;
00464 str << tmpid;
00465
00466 }
00467
00468
00469
00470 for(uint i=0; i < d->hashTableSize; i++)
00471 {
00472 const QList<string_entry*> *dups = hashTable[i].duplicates;
00473 if (dups)
00474 {
00475 hashTable[i].duplicate_offset = str.device()->pos();
00476
00477
00478
00479 for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup)
00480 {
00481 const qint32 offset = (*dup)->payload->offset();
00482
00483 Q_ASSERT_X( offset, "KSycocaDict::save",
00484 QByteArray("entry offset is 0, save() was not called on ")
00485 + (*dup)->payload->name().toLatin1() );
00486 str << offset ;
00487 str << (*dup)->keyStr;
00488 }
00489 str << (qint32) 0;
00490 }
00491 }
00492
00493 }
00494
00495
00496 for(uint i=0; i < d->hashTableSize; i++)
00497 {
00498 delete hashTable[i].duplicates;
00499 }
00500 delete [] hashTable;
00501 }
00502
00503 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const
00504 {
00505 if ( !stream || !offset )
00506 {
00507 kError() << "No ksycoca4 database available!" << endl;
00508 return 0;
00509 }
00510
00511 if (hashTableSize == 0)
00512 return 0;
00513
00514
00515 const uint hash = hashKey(key) % hashTableSize;
00516
00517
00518 const uint off = offset+sizeof(qint32)*hash;
00519
00520 stream->device()->seek( off );
00521
00522 qint32 retOffset;
00523 (*stream) >> retOffset;
00524 return retOffset;
00525 }