• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

ktranslit.cpp

Go to the documentation of this file.
00001 /*  This file is part of the KDE libraries    Copyright (C) 2007 Chusslove Illich <caslav.ilic@gmx.net>
00002 
00003     This library is free software; you can redistribute it and/or
00004     modify it under the terms of the GNU Library General Public
00005     License as published by the Free Software Foundation; either
00006     version 2 of the License, or (at your option) any later version.
00007 
00008     This library is distributed in the hope that it will be useful,
00009     but WITHOUT ANY WARRANTY; without even the implied warranty of
00010     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011     Library General Public License for more details.
00012 
00013     You should have received a copy of the GNU Library General Public License
00014     along with this library; see the file COPYING.LIB.  If not, write to
00015     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00016     Boston, MA 02110-1301, USA.
00017 */
00018 
00019 #include <ktranslit_p.h>
00020 #include <kdebug.h>
00021 
00022 #include <config.h>
00023 
00024 #include <QHash>
00025 
00026 // -----------------------------------------------------------------------------
00027 // Base class.
00028 
00029 class KTranslitPrivate
00030 {
00031 };
00032 
00033 KTranslit::KTranslit ()
00034 : d(NULL)
00035 {
00036 }
00037 
00038 KTranslit::~KTranslit ()
00039 {
00040     delete d;
00041 }
00042 
00043 KTranslit *KTranslit::create (const QString &lang)
00044 {
00045     if (lang == QString::fromAscii("sr")) {
00046         return new KTranslitSerbian();
00047     }
00048     else {
00049         return NULL;
00050     }
00051 }
00052 
00053 QStringList KTranslit::fallbackList (const QString &lang)
00054 {
00055     QStringList fallbacks;
00056 
00057     if (lang.startsWith(QString::fromAscii("sr@"))) {
00058         fallbacks += QString::fromAscii("sr");
00059     }
00060 
00061     return fallbacks;
00062 }
00063 
00064 void splitLangScript (const QString &lang, QString &ln, QString &scr)
00065 {
00066     ln = lang;
00067     scr.clear();
00068     int pos = lang.indexOf('@');
00069     if (pos >= 0) {
00070         ln = lang.left(pos);
00071         scr = lang.mid(pos + 1);
00072     }
00073 }
00074 
00075 QString KTranslit::higherPriorityScript (const QString &lang,
00076                                          const KLocale *locale)
00077 {
00078     if (locale == NULL) {
00079         return QString();
00080     }
00081 
00082     // Split into pure language and script part.
00083     QString ln, scr;
00084     splitLangScript(lang, ln, scr);
00085 
00086     // Search through higher priority languages.
00087     QString finalScrHi;
00088     if (lang != KLocale::defaultLanguage()) {
00089         foreach (const QString &langHi, locale->languageList()) {
00090             // Don't search lower priority languages.
00091             if (langHi == lang)
00092                 break;
00093 
00094             // Split current spec into pure language and script parts.
00095             QString lnHi, scrHi;
00096             splitLangScript(langHi, lnHi, scrHi);
00097 
00098             // Return current script if languages match.
00099             if (lnHi == ln) {
00100                 finalScrHi = scrHi;
00101                 break;
00102             }
00103         }
00104     }
00105     return finalScrHi;
00106 }
00107 
00108 QString KTranslit::transliterate (const QString &str,
00109                                   const QString &script) const
00110 {
00111     Q_UNUSED(script);
00112     return str;
00113 }
00114 
00115 QString KTranslit::resolveInserts (const QString &str_, int nins, int ind,
00116                                    const QString &head) const
00117 {
00118     int hlen = head.length();
00119 
00120     QString str = str_;
00121     QString rstr;
00122     while (1) {
00123         int p = str.indexOf(head);
00124         if (p < 0) {
00125             break;
00126         }
00127 
00128         // Append segment before optional insert to resulting text.
00129         rstr.append(str.left(p));
00130 
00131         // Must have at least 2 characters after the head.
00132         if (str.length() < p + hlen + 2) {
00133             kDebug(173) << QString("Malformed optional inserts list in {%1}, "
00134                                    "starting here: {%2}").arg(str_, str);
00135             return str_;
00136         }
00137 
00138         // Read the separating character and trim original string.
00139         QChar sep = str[p + hlen];
00140         str.remove(0, p + hlen + 1);
00141 
00142         // Parse requested number of inserts,
00143         // choose the one with matching index for resulting text.
00144         for (int i = 0; i < nins; ++i) {
00145             // Ending separator for this insert.
00146             int p = str.indexOf(sep);
00147 
00148             // Must have exactly the requested number of inserts.
00149             if (p < 0) {
00150                 kDebug(173) << QString("Not enough inserts listed in {%1}, "
00151                                        "starting here: {%2}").arg(str_, str);
00152                 return str_;
00153             }
00154 
00155             // If index is matching requested, append to resulting text.
00156             if (i == ind) {
00157                 rstr.append(str.left(p));
00158             }
00159 
00160             // Trim original string.
00161             str.remove(0, p + 1);
00162         }
00163     }
00164     // Append the final segment to resulting text.
00165     rstr.append(str);
00166 
00167     return rstr;
00168 }
00169 
00170 // If the insert is just starting at position i, return the position of the
00171 // first character after the insert (or string length if none).
00172 // If the insert is not starting, return i itself.
00173 static int skipInsert (const QString &str, int i, int ninserts,
00174                        const QString &head)
00175 {
00176     int hlen = head.length();
00177 
00178     if (str.mid(i, hlen) == head) {
00179         int slen = str.length();
00180         int ia = i + hlen;
00181         if (ia >= slen) return slen;
00182         QChar sep = str[ia];
00183         for (int k = 0; k < ninserts; ++k) {
00184             ia = str.indexOf(sep, ia + 1);
00185             if (ia < 0) return slen;
00186         }
00187         return ia + 1;
00188     }
00189     else {
00190         return i;
00191     }
00192 }
00193 
00194 // -----------------------------------------------------------------------------
00195 // Serbian.
00196 
00197 class KTranslitSerbianPrivate
00198 {
00199     public:
00200     QHash<QString, bool> latinNames;
00201     QHash<QString, bool> yekavianNames;
00202     QHash<QChar, QString> dictC2L;
00203     QHash<QString, QString> dictI2E;
00204     int maxReflexLen;
00205     QChar reflexMark;
00206 };
00207 
00208 KTranslitSerbian::KTranslitSerbian ()
00209 : d(new KTranslitSerbianPrivate())
00210 {
00211     #define SR_NAME_ENTRY(hash, name) do { \
00212         hash[QString::fromAscii(name)] = true; \
00213     } while (0)
00214     SR_NAME_ENTRY(d->latinNames, "latin");
00215     SR_NAME_ENTRY(d->latinNames, "Latn");
00216     SR_NAME_ENTRY(d->latinNames, "ijelatin");
00217     SR_NAME_ENTRY(d->latinNames, "jekavianlatin");
00218     SR_NAME_ENTRY(d->latinNames, "ijekavianlatin");
00219     SR_NAME_ENTRY(d->latinNames, "yekavianlatin");
00220     SR_NAME_ENTRY(d->latinNames, "iyekavianlatin");
00221     SR_NAME_ENTRY(d->yekavianNames, "ije");
00222     SR_NAME_ENTRY(d->yekavianNames, "ijelatin");
00223     SR_NAME_ENTRY(d->yekavianNames, "jekavian");
00224     SR_NAME_ENTRY(d->yekavianNames, "jekavianlatin");
00225     SR_NAME_ENTRY(d->yekavianNames, "ijekavian");
00226     SR_NAME_ENTRY(d->yekavianNames, "ijekavianlatin");
00227     SR_NAME_ENTRY(d->yekavianNames, "yekavian");
00228     SR_NAME_ENTRY(d->yekavianNames, "yekavianlatin");
00229     SR_NAME_ENTRY(d->yekavianNames, "iyekavian");
00230     SR_NAME_ENTRY(d->yekavianNames, "iyekavianlatin");
00231 
00232     #define SR_DICTC2L_ENTRY(a, b) do { \
00233         d->dictC2L[QString::fromUtf8(a)[0]] = QString::fromUtf8(b); \
00234     } while (0)
00235     SR_DICTC2L_ENTRY("а", "a");
00236     SR_DICTC2L_ENTRY("б", "b");
00237     SR_DICTC2L_ENTRY("в", "v");
00238     SR_DICTC2L_ENTRY("г", "g");
00239     SR_DICTC2L_ENTRY("д", "d");
00240     SR_DICTC2L_ENTRY("ђ", "đ");
00241     SR_DICTC2L_ENTRY("е", "e");
00242     SR_DICTC2L_ENTRY("ж", "ž");
00243     SR_DICTC2L_ENTRY("з", "z");
00244     SR_DICTC2L_ENTRY("и", "i");
00245     SR_DICTC2L_ENTRY("ј", "j");
00246     SR_DICTC2L_ENTRY("к", "k");
00247     SR_DICTC2L_ENTRY("л", "l");
00248     SR_DICTC2L_ENTRY("љ", "lj");
00249     SR_DICTC2L_ENTRY("м", "m");
00250     SR_DICTC2L_ENTRY("н", "n");
00251     SR_DICTC2L_ENTRY("њ", "nj");
00252     SR_DICTC2L_ENTRY("о", "o");
00253     SR_DICTC2L_ENTRY("п", "p");
00254     SR_DICTC2L_ENTRY("р", "r");
00255     SR_DICTC2L_ENTRY("с", "s");
00256     SR_DICTC2L_ENTRY("т", "t");
00257     SR_DICTC2L_ENTRY("ћ", "ć");
00258     SR_DICTC2L_ENTRY("у", "u");
00259     SR_DICTC2L_ENTRY("ф", "f");
00260     SR_DICTC2L_ENTRY("х", "h");
00261     SR_DICTC2L_ENTRY("ц", "c");
00262     SR_DICTC2L_ENTRY("ч", "č");
00263     SR_DICTC2L_ENTRY("џ", "dž");
00264     SR_DICTC2L_ENTRY("ш", "š");
00265     SR_DICTC2L_ENTRY("А", "A");
00266     SR_DICTC2L_ENTRY("Б", "B");
00267     SR_DICTC2L_ENTRY("В", "V");
00268     SR_DICTC2L_ENTRY("Г", "G");
00269     SR_DICTC2L_ENTRY("Д", "D");
00270     SR_DICTC2L_ENTRY("Ђ", "Đ");
00271     SR_DICTC2L_ENTRY("Е", "E");
00272     SR_DICTC2L_ENTRY("Ж", "Ž");
00273     SR_DICTC2L_ENTRY("З", "Z");
00274     SR_DICTC2L_ENTRY("И", "I");
00275     SR_DICTC2L_ENTRY("Ј", "J");
00276     SR_DICTC2L_ENTRY("К", "K");
00277     SR_DICTC2L_ENTRY("Л", "L");
00278     SR_DICTC2L_ENTRY("Љ", "Lj");
00279     SR_DICTC2L_ENTRY("М", "M");
00280     SR_DICTC2L_ENTRY("Н", "N");
00281     SR_DICTC2L_ENTRY("Њ", "Nj");
00282     SR_DICTC2L_ENTRY("О", "O");
00283     SR_DICTC2L_ENTRY("П", "P");
00284     SR_DICTC2L_ENTRY("Р", "R");
00285     SR_DICTC2L_ENTRY("С", "S");
00286     SR_DICTC2L_ENTRY("Т", "T");
00287     SR_DICTC2L_ENTRY("Ћ", "Ć");
00288     SR_DICTC2L_ENTRY("У", "U");
00289     SR_DICTC2L_ENTRY("Ф", "F");
00290     SR_DICTC2L_ENTRY("Х", "H");
00291     SR_DICTC2L_ENTRY("Ц", "C");
00292     SR_DICTC2L_ENTRY("Ч", "Č");
00293     SR_DICTC2L_ENTRY("Џ", "Dž");
00294     SR_DICTC2L_ENTRY("Ш", "Š");
00295 
00296     d->reflexMark = QString::fromUtf8("›")[0];
00297     #define SR_DICTI2E_ENTRY(a, b) do { \
00298         d->dictI2E[QString::fromUtf8(a)] = QString::fromUtf8(b); \
00299     } while (0)
00300     // basic
00301     SR_DICTI2E_ENTRY("ије", "е");
00302     SR_DICTI2E_ENTRY("иј", "е");
00303     SR_DICTI2E_ENTRY("је", "е");
00304     SR_DICTI2E_ENTRY("ље", "ле");
00305     SR_DICTI2E_ENTRY("ње", "не");
00306     SR_DICTI2E_ENTRY("ио", "ео");
00307     SR_DICTI2E_ENTRY("иљ", "ел");
00308     // special cases (include one prev. letter)
00309     SR_DICTI2E_ENTRY("лије", "ли");
00310     SR_DICTI2E_ENTRY("мија", "меја");
00311     SR_DICTI2E_ENTRY("мије", "мејe");
00312     SR_DICTI2E_ENTRY("није", "ни");
00313 
00314     d->maxReflexLen = 0;
00315     foreach (const QString &reflex, d->dictI2E.keys()) {
00316         if (d->maxReflexLen < reflex.length()) {
00317             d->maxReflexLen = reflex.length();
00318         }
00319     }
00320 }
00321 
00322 KTranslitSerbian::~KTranslitSerbian ()
00323 {
00324     delete d;
00325 }
00326 
00327 QString KTranslitSerbian::transliterate (const QString &str_,
00328                                          const QString &script) const
00329 {
00330     static QString insHead("~@");
00331     static QString insHeadIje("~#");
00332 
00333     QString str = str_;
00334 
00335     // Resolve Ekavian/Yekavian (must come before Cyrillic/Latin).
00336     if (d->yekavianNames.contains(script)) {
00337         // Just remove reflex marks.
00338         str.remove(d->reflexMark);
00339         str = resolveInserts(str, 2, 1, insHeadIje);
00340     } else {
00341         QString nstr;
00342         int p = 0;
00343         while (true) {
00344             int pp = p;
00345             p = str.indexOf(d->reflexMark, p);
00346             if (p < 0) {
00347                 nstr.append(str.mid(pp));
00348                 break;
00349             }
00350             nstr.append(str.mid(pp, p - pp));
00351             p += 1;
00352 
00353             // Try to resolve yat-reflex.
00354             QString reflex;
00355             QString ekvform;
00356             for (int rl = d->maxReflexLen; rl > 0; --rl) {
00357                 reflex = str.mid(p, rl);
00358                 ekvform = d->dictI2E[reflex];
00359                 if (!ekvform.isEmpty()) {
00360                     break;
00361                 }
00362             }
00363 
00364             if (!ekvform.isEmpty()) {
00365                 nstr.append(ekvform);
00366                 p += reflex.length();
00367             } else {
00368                 QString dreflex = str.mid(p - 1, d->maxReflexLen + 1);
00369                 kDebug(173) << QString("Unknown yat-reflex {%1} "
00370                                        "in {%2}").arg(dreflex, str);
00371                 nstr.append(str.mid(p - 1, 1));
00372             }
00373         }
00374         str = resolveInserts(nstr, 2, 0, insHeadIje);
00375     }
00376 
00377     // Resolve Cyrillic/Latin.
00378     if (d->latinNames.contains(script)) {
00379         // NOTE: This loop has been somewhat optimized for speed.
00380         int slen = str.length();
00381         bool anyInserts = str.indexOf(insHead) >= 0;
00382         QString nstr;
00383         nstr.reserve(slen + 5);
00384         for (int i = 0; i < slen; ++i) {
00385             // Skip alternative inserts altogether, so that they can be used
00386             // as a mean to exclude from transliteration.
00387             if (anyInserts) {
00388                 int to = skipInsert(str, i, 2, insHead);
00389                 if (to > i) {
00390                     nstr.append(str.mid(i, to - i));
00391                     if (to >= slen) break;
00392                     i = to;
00393                 }
00394             }
00395             // Transliterate current character.
00396             QChar c = str[i];
00397             QString r = d->dictC2L[c];
00398             if (!r.isEmpty()) {
00399                 if (   r.length() > 1 && c.isUpper()
00400                     && (   (i + 1 < slen && str[i + 1].isUpper())
00401                         || (i > 0 && str[i - 1].isUpper()))) {
00402                     nstr.append(r.toUpper());
00403                 } else {
00404                     nstr.append(r);
00405                 }
00406             } else {
00407                 nstr.append(c);
00408             }
00409         }
00410         str = resolveInserts(nstr, 2, 1, insHead);
00411     } else {
00412         str = resolveInserts(str, 2, 0, insHead);
00413     }
00414 
00415     return str;
00416 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal