• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

kstringhandler.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002    Copyright (C) 1999 Ian Zepp (icszepp@islc.net)
00003    Copyright (C) 2006 by Dominic Battre <dominic@battre.de>
00004    Copyright (C) 2006 by Martin Pool <mbp@canonical.com>
00005 
00006    This library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Library General Public
00008    License as published by the Free Software Foundation; either
00009    version 2 of the License, or (at your option) any later version.
00010 
00011    This library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Library General Public License for more details.
00015 
00016    You should have received a copy of the GNU Library General Public License
00017    along with this library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019    Boston, MA 02110-1301, USA.
00020 */
00021 
00022 #include "kstringhandler.h"
00023 
00024 #include <stdlib.h>     // random()
00025 
00026 #include <kglobal.h>
00027 
00028 #include <QtCore/QRegExp>            // for the word ranges
00029 #include <QtCore/QCharRef>
00030 #include <QtCore/QMutableStringListIterator>
00031 
00032 
00033 
00034 //
00035 // Capitalization routines
00036 //
00037 QString KStringHandler::capwords( const QString &text )
00038 {
00039     if ( text.isEmpty() ) {
00040         return text;
00041     }
00042 
00043     const QString strippedText = text.trimmed();
00044     const QStringList words = capwords( strippedText.split(' '));
00045 
00046 
00047     QString result = text;
00048     result.replace( strippedText, words.join( " " ) );
00049     return result;
00050 }
00051 
00052 QStringList KStringHandler::capwords( const QStringList &list )
00053 {
00054     QStringList tmp = list;
00055     for ( QStringList::Iterator it = tmp.begin(); it != tmp.end(); ++it ) {
00056         *it = ( *it )[ 0 ].toUpper() + ( *it ).mid( 1 );
00057     }
00058     return tmp;
00059 }
00060 
00061 
00062 QString KStringHandler::lsqueeze( const QString & str, int maxlen )
00063 {
00064   if (str.length() > maxlen) {
00065     int part = maxlen-3;
00066     return QString("..." + str.right(part));
00067   }
00068   else return str;
00069 }
00070 
00071 QString KStringHandler::csqueeze( const QString & str, int maxlen )
00072 {
00073   if (str.length() > maxlen && maxlen > 3) {
00074     int part = (maxlen-3)/2;
00075     return QString(str.left(part) + "..." + str.right(part));
00076   }
00077   else return str;
00078 }
00079 
00080 QString KStringHandler::rsqueeze( const QString & str, int maxlen )
00081 {
00082   if (str.length() > maxlen) {
00083     int part = maxlen-3;
00084     return QString(str.left(part) + "...");
00085   }
00086   else return str;
00087 }
00088 
00089 QStringList KStringHandler::perlSplit(const QString & sep, const QString & s, int max)
00090 {
00091   bool ignoreMax = 0 == max;
00092 
00093   QStringList l;
00094 
00095   int searchStart = 0;
00096 
00097   int tokenStart = s.indexOf(sep, searchStart);
00098 
00099   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00100   {
00101     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00102       l << s.mid(searchStart, tokenStart - searchStart);
00103 
00104     searchStart = tokenStart + sep.length();
00105     tokenStart = s.indexOf(sep, searchStart);
00106   }
00107 
00108   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00109     l << s.mid(searchStart, s.length() - searchStart);
00110 
00111   return l;
00112 }
00113 
00114 QStringList KStringHandler::perlSplit(const QChar & sep, const QString & s, int max)
00115 {
00116   bool ignoreMax = 0 == max;
00117 
00118   QStringList l;
00119 
00120   int searchStart = 0;
00121 
00122   int tokenStart = s.indexOf(sep, searchStart);
00123 
00124   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00125   {
00126     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00127       l << s.mid(searchStart, tokenStart - searchStart);
00128 
00129     searchStart = tokenStart + 1;
00130     tokenStart = s.indexOf(sep, searchStart);
00131   }
00132 
00133   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00134     l << s.mid(searchStart, s.length() - searchStart);
00135 
00136   return l;
00137 }
00138 
00139 QStringList KStringHandler::perlSplit(const QRegExp & sep, const QString & s, int max)
00140 {
00141   bool ignoreMax = 0 == max;
00142 
00143   QStringList l;
00144 
00145   int searchStart = 0;
00146   int tokenStart = sep.indexIn(s, searchStart);
00147   int len = sep.matchedLength();
00148 
00149   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00150   {
00151     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00152       l << s.mid(searchStart, tokenStart - searchStart);
00153 
00154     searchStart = tokenStart + len;
00155     tokenStart = sep.indexIn(s, searchStart);
00156     len = sep.matchedLength();
00157   }
00158 
00159   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00160     l << s.mid(searchStart, s.length() - searchStart);
00161 
00162   return l;
00163 }
00164 
00165 QString KStringHandler::tagUrls( const QString& text )
00166 {
00167     /*static*/ QRegExp urlEx("(www\\.(?!\\.)|(fish|(f|ht)tp(|s))://)[\\d\\w\\./,:_~\\?=&;#@\\-\\+\\%\\$]+[\\d\\w/]");
00168 
00169     QString richText( text );
00170     int urlPos = 0, urlLen;
00171     while ((urlPos = urlEx.indexIn(richText, urlPos)) >= 0)
00172     {
00173         urlLen = urlEx.matchedLength();
00174         QString href = richText.mid( urlPos, urlLen );
00175         // Qt doesn't support (?<=pattern) so we do it here
00176         if((urlPos > 0) && richText[urlPos-1].isLetterOrNumber()){
00177             urlPos++;
00178             continue;
00179         }
00180         // Don't use QString::arg since %01, %20, etc could be in the string
00181         QString anchor = "<a href=\"" + href + "\">" + href + "</a>";
00182         richText.replace( urlPos, urlLen, anchor );
00183 
00184 
00185         urlPos += anchor.length();
00186     }
00187     return richText;
00188 }
00189 
00190 QString KStringHandler::obscure( const QString &str )
00191 {
00192   QString result;
00193   const QChar *unicode = str.unicode();
00194   for ( int i = 0; i < str.length(); ++i )
00195     result += ( unicode[ i ].unicode() < 0x21 ) ? unicode[ i ] :
00196         QChar( 0x1001F - unicode[ i ].unicode() );
00197 
00198   return result;
00199 }
00200 
00201 
00202 bool KStringHandler::isUtf8( const char *buf )
00203 {
00204   int i, n;
00205   register unsigned char c;
00206   bool gotone = false;
00207 
00208   if (!buf)
00209     return true; // whatever, just don't crash
00210 
00211 #define F 0   /* character never appears in text */
00212 #define T 1   /* character appears in plain ASCII text */
00213 #define I 2   /* character appears in ISO-8859 text */
00214 #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
00215 
00216   static const unsigned char text_chars[256] = {
00217         /*                  BEL BS HT LF    FF CR    */
00218         F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
00219         /*                              ESC          */
00220         F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
00221         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
00222         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
00223         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
00224         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
00225         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
00226         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
00227         /*            NEL                            */
00228         X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
00229         X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
00230         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
00231         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
00232         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
00233         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
00234         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
00235         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
00236   };
00237 
00238   /* *ulen = 0; */
00239   for (i = 0; (c = buf[i]); ++i) {
00240     if ((c & 0x80) == 0) {        /* 0xxxxxxx is plain ASCII */
00241       /*
00242        * Even if the whole file is valid UTF-8 sequences,
00243        * still reject it if it uses weird control characters.
00244        */
00245 
00246       if (text_chars[c] != T)
00247         return false;
00248 
00249     } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
00250       return false;
00251     } else {                           /* 11xxxxxx begins UTF-8 */
00252       int following;
00253 
00254     if ((c & 0x20) == 0) {             /* 110xxxxx */
00255       following = 1;
00256     } else if ((c & 0x10) == 0) {      /* 1110xxxx */
00257       following = 2;
00258     } else if ((c & 0x08) == 0) {      /* 11110xxx */
00259       following = 3;
00260     } else if ((c & 0x04) == 0) {      /* 111110xx */
00261       following = 4;
00262     } else if ((c & 0x02) == 0) {      /* 1111110x */
00263       following = 5;
00264     } else
00265       return false;
00266 
00267       for (n = 0; n < following; ++n) {
00268         i++;
00269         if (!(c = buf[i]))
00270           goto done;
00271 
00272         if ((c & 0x80) == 0 || (c & 0x40))
00273           return false;
00274       }
00275       gotone = true;
00276     }
00277   }
00278 done:
00279   return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
00280 }
00281 
00282 #undef F
00283 #undef T
00284 #undef I
00285 #undef X
00286 
00287 QString KStringHandler::from8Bit( const char *str )
00288 {
00289   if (!str)
00290     return QString();
00291   if (!*str) {
00292     static const QString &emptyString = KGlobal::staticQString("");
00293     return emptyString;
00294   }
00295   return KStringHandler::isUtf8( str ) ?
00296              QString::fromUtf8( str ) :
00297              QString::fromLocal8Bit( str );
00298 }
00299 
00300 int KStringHandler::naturalCompare(const QString &_a, const QString &_b, Qt::CaseSensitivity caseSensitivity)
00301 {
00302     // This method chops the input a and b into pieces of
00303     // digits and non-digits (a1.05 becomes a | 1 | . | 05)
00304     // and compares these pieces of a and b to each other
00305     // (first with first, second with second, ...).
00306     //
00307     // This is based on the natural sort order code code by Martin Pool
00308     // http://sourcefrog.net/projects/natsort/
00309     // Martin Pool agreed to license this under LGPL or GPL.
00310 
00311     // FIXME: Using toLower() to implement case insensitive comparison is
00312     // sub-optimal, but is needed because we compare strings with
00313     // localeAwareCompare(), which does not know about case sensitivity.
00314     // A task has been filled for this in Qt Task Tracker with ID 205990.
00315     // http://trolltech.com/developer/task-tracker/index_html?method=entry&id=205990
00316     QString a;
00317     QString b;
00318     if (caseSensitivity == Qt::CaseSensitive) {
00319         a = _a;
00320         b = _b;
00321     } else {
00322         a = _a.toLower();
00323         b = _b.toLower();
00324     }
00325 
00326     const QChar* currA = a.unicode(); // iterator over a
00327     const QChar* currB = b.unicode(); // iterator over b
00328 
00329     if (currA == currB) {
00330         return 0;
00331     }
00332 
00333     const QChar* begSeqA = currA; // beginning of a new character sequence of a
00334     const QChar* begSeqB = currB;
00335 
00336     while (!currA->isNull() && !currB->isNull()) {
00337         if (currA->unicode() == QChar::ObjectReplacementCharacter) {
00338             return 1;
00339         }
00340 
00341         if (currB->unicode() == QChar::ObjectReplacementCharacter) {
00342             return -1;
00343         }
00344 
00345         if (currA->unicode() == QChar::ReplacementCharacter) {
00346             return 1;
00347         }
00348 
00349         if (currB->unicode() == QChar::ReplacementCharacter) {
00350             return -1;
00351         }
00352 
00353         // find sequence of characters ending at the first non-character
00354         while (!currA->isNull() && !currA->isDigit()) {
00355             ++currA;
00356         }
00357 
00358         while (!currB->isNull() && !currB->isDigit()) {
00359             ++currB;
00360         }
00361 
00362         // compare these sequences
00363         QString subA(begSeqA, currA - begSeqA);
00364         QString subB(begSeqB, currB - begSeqB);
00365         const int cmp = QString::localeAwareCompare(subA, subB);
00366         if (cmp != 0) {
00367             return cmp < 0 ? -1 : 1;
00368         }
00369 
00370         if (currA->isNull() || currB->isNull()) {
00371             break;
00372         }
00373 
00374         // now some digits follow...
00375         if ((*currA == '0') || (*currB == '0')) {
00376             // one digit-sequence starts with 0 -> assume we are in a fraction part
00377             // do left aligned comparison (numbers are considered left aligned)
00378             while (1) {
00379                 if (!currA->isDigit() && !currB->isDigit()) {
00380                     break;
00381                 } else if (!currA->isDigit()) {
00382                     return -1;
00383                 } else if (!currB->isDigit()) {
00384                     return + 1;
00385                 } else if (*currA < *currB) {
00386                     return -1;
00387                 } else if (*currA > *currB) {
00388                     return + 1;
00389                 }
00390                 ++currA;
00391                 ++currB;
00392             }
00393         } else {
00394             // No digit-sequence starts with 0 -> assume we are looking at some integer
00395             // do right aligned comparison.
00396             //
00397             // The longest run of digits wins. That aside, the greatest
00398             // value wins, but we can't know that it will until we've scanned
00399             // both numbers to know that they have the same magnitude.
00400 
00401             int weight = 0;
00402             while (1) {
00403                 if (!currA->isDigit() && !currB->isDigit()) {
00404                     if (weight != 0) {
00405                         return weight;
00406                     }
00407                     break;
00408                 } else if (!currA->isDigit()) {
00409                     return -1;
00410                 } else if (!currB->isDigit()) {
00411                     return + 1;
00412                 } else if ((*currA < *currB) && (weight == 0)) {
00413                     weight = -1;
00414                 } else if ((*currA > *currB) && (weight == 0)) {
00415                     weight = + 1;
00416                 }
00417                 ++currA;
00418                 ++currB;
00419             }
00420         }
00421 
00422         begSeqA = currA;
00423         begSeqB = currB;
00424     }
00425 
00426     if (currA->isNull() && currB->isNull()) {
00427         return 0;
00428     }
00429 
00430     return currA->isNull() ? -1 : + 1;
00431 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal