00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <ktranslit_p.h>
00020 #include <kdebug.h>
00021
00022 #include <config.h>
00023
00024 #include <QHash>
00025
00026
00027
00028
00029 class KTranslitPrivate
00030 {
00031 };
00032
00033 KTranslit::KTranslit ()
00034 : d(NULL)
00035 {
00036 }
00037
00038 KTranslit::~KTranslit ()
00039 {
00040 delete d;
00041 }
00042
00043 KTranslit *KTranslit::create (const QString &lang)
00044 {
00045 if (lang == QString::fromAscii("sr")) {
00046 return new KTranslitSerbian();
00047 }
00048 else {
00049 return NULL;
00050 }
00051 }
00052
00053 QStringList KTranslit::fallbackList (const QString &lang)
00054 {
00055 QStringList fallbacks;
00056
00057 if (lang.startsWith(QString::fromAscii("sr@"))) {
00058 fallbacks += QString::fromAscii("sr");
00059 }
00060
00061 return fallbacks;
00062 }
00063
00064 void splitLangScript (const QString &lang, QString &ln, QString &scr)
00065 {
00066 ln = lang;
00067 scr.clear();
00068 int pos = lang.indexOf('@');
00069 if (pos >= 0) {
00070 ln = lang.left(pos);
00071 scr = lang.mid(pos + 1);
00072 }
00073 }
00074
00075 QString KTranslit::higherPriorityScript (const QString &lang,
00076 const KLocale *locale)
00077 {
00078 if (locale == NULL) {
00079 return QString();
00080 }
00081
00082
00083 QString ln, scr;
00084 splitLangScript(lang, ln, scr);
00085
00086
00087 QString finalScrHi;
00088 if (lang != KLocale::defaultLanguage()) {
00089 foreach (const QString &langHi, locale->languageList()) {
00090
00091 if (langHi == lang)
00092 break;
00093
00094
00095 QString lnHi, scrHi;
00096 splitLangScript(langHi, lnHi, scrHi);
00097
00098
00099 if (lnHi == ln) {
00100 finalScrHi = scrHi;
00101 break;
00102 }
00103 }
00104 }
00105 return finalScrHi;
00106 }
00107
00108 QString KTranslit::transliterate (const QString &str,
00109 const QString &script) const
00110 {
00111 Q_UNUSED(script);
00112 return str;
00113 }
00114
00115 QString KTranslit::resolveInserts (const QString &str_, int nins, int ind,
00116 const QString &head) const
00117 {
00118 int hlen = head.length();
00119
00120 QString str = str_;
00121 QString rstr;
00122 while (1) {
00123 int p = str.indexOf(head);
00124 if (p < 0) {
00125 break;
00126 }
00127
00128
00129 rstr.append(str.left(p));
00130
00131
00132 if (str.length() < p + hlen + 2) {
00133 kDebug(173) << QString("Malformed optional inserts list in {%1}, "
00134 "starting here: {%2}").arg(str_, str);
00135 return str_;
00136 }
00137
00138
00139 QChar sep = str[p + hlen];
00140 str.remove(0, p + hlen + 1);
00141
00142
00143
00144 for (int i = 0; i < nins; ++i) {
00145
00146 int p = str.indexOf(sep);
00147
00148
00149 if (p < 0) {
00150 kDebug(173) << QString("Not enough inserts listed in {%1}, "
00151 "starting here: {%2}").arg(str_, str);
00152 return str_;
00153 }
00154
00155
00156 if (i == ind) {
00157 rstr.append(str.left(p));
00158 }
00159
00160
00161 str.remove(0, p + 1);
00162 }
00163 }
00164
00165 rstr.append(str);
00166
00167 return rstr;
00168 }
00169
00170
00171
00172
00173 static int skipInsert (const QString &str, int i, int ninserts,
00174 const QString &head)
00175 {
00176 int hlen = head.length();
00177
00178 if (str.mid(i, hlen) == head) {
00179 int slen = str.length();
00180 int ia = i + hlen;
00181 if (ia >= slen) return slen;
00182 QChar sep = str[ia];
00183 for (int k = 0; k < ninserts; ++k) {
00184 ia = str.indexOf(sep, ia + 1);
00185 if (ia < 0) return slen;
00186 }
00187 return ia + 1;
00188 }
00189 else {
00190 return i;
00191 }
00192 }
00193
00194
00195
00196
00197 class KTranslitSerbianPrivate
00198 {
00199 public:
00200 QHash<QString, bool> latinNames;
00201 QHash<QString, bool> yekavianNames;
00202 QHash<QChar, QString> dictC2L;
00203 QHash<QString, QString> dictI2E;
00204 int maxReflexLen;
00205 QChar reflexMark;
00206 };
00207
00208 KTranslitSerbian::KTranslitSerbian ()
00209 : d(new KTranslitSerbianPrivate())
00210 {
00211 #define SR_NAME_ENTRY(hash, name) do { \
00212 hash[QString::fromAscii(name)] = true; \
00213 } while (0)
00214 SR_NAME_ENTRY(d->latinNames, "latin");
00215 SR_NAME_ENTRY(d->latinNames, "Latn");
00216 SR_NAME_ENTRY(d->latinNames, "ijelatin");
00217 SR_NAME_ENTRY(d->latinNames, "jekavianlatin");
00218 SR_NAME_ENTRY(d->latinNames, "ijekavianlatin");
00219 SR_NAME_ENTRY(d->latinNames, "yekavianlatin");
00220 SR_NAME_ENTRY(d->latinNames, "iyekavianlatin");
00221 SR_NAME_ENTRY(d->yekavianNames, "ije");
00222 SR_NAME_ENTRY(d->yekavianNames, "ijelatin");
00223 SR_NAME_ENTRY(d->yekavianNames, "jekavian");
00224 SR_NAME_ENTRY(d->yekavianNames, "jekavianlatin");
00225 SR_NAME_ENTRY(d->yekavianNames, "ijekavian");
00226 SR_NAME_ENTRY(d->yekavianNames, "ijekavianlatin");
00227 SR_NAME_ENTRY(d->yekavianNames, "yekavian");
00228 SR_NAME_ENTRY(d->yekavianNames, "yekavianlatin");
00229 SR_NAME_ENTRY(d->yekavianNames, "iyekavian");
00230 SR_NAME_ENTRY(d->yekavianNames, "iyekavianlatin");
00231
00232 #define SR_DICTC2L_ENTRY(a, b) do { \
00233 d->dictC2L[QString::fromUtf8(a)[0]] = QString::fromUtf8(b); \
00234 } while (0)
00235 SR_DICTC2L_ENTRY("а", "a");
00236 SR_DICTC2L_ENTRY("б", "b");
00237 SR_DICTC2L_ENTRY("в", "v");
00238 SR_DICTC2L_ENTRY("г", "g");
00239 SR_DICTC2L_ENTRY("д", "d");
00240 SR_DICTC2L_ENTRY("ђ", "đ");
00241 SR_DICTC2L_ENTRY("е", "e");
00242 SR_DICTC2L_ENTRY("ж", "ž");
00243 SR_DICTC2L_ENTRY("з", "z");
00244 SR_DICTC2L_ENTRY("и", "i");
00245 SR_DICTC2L_ENTRY("ј", "j");
00246 SR_DICTC2L_ENTRY("к", "k");
00247 SR_DICTC2L_ENTRY("л", "l");
00248 SR_DICTC2L_ENTRY("љ", "lj");
00249 SR_DICTC2L_ENTRY("м", "m");
00250 SR_DICTC2L_ENTRY("н", "n");
00251 SR_DICTC2L_ENTRY("њ", "nj");
00252 SR_DICTC2L_ENTRY("о", "o");
00253 SR_DICTC2L_ENTRY("п", "p");
00254 SR_DICTC2L_ENTRY("р", "r");
00255 SR_DICTC2L_ENTRY("с", "s");
00256 SR_DICTC2L_ENTRY("т", "t");
00257 SR_DICTC2L_ENTRY("ћ", "ć");
00258 SR_DICTC2L_ENTRY("у", "u");
00259 SR_DICTC2L_ENTRY("ф", "f");
00260 SR_DICTC2L_ENTRY("х", "h");
00261 SR_DICTC2L_ENTRY("ц", "c");
00262 SR_DICTC2L_ENTRY("ч", "č");
00263 SR_DICTC2L_ENTRY("џ", "dž");
00264 SR_DICTC2L_ENTRY("ш", "š");
00265 SR_DICTC2L_ENTRY("А", "A");
00266 SR_DICTC2L_ENTRY("Б", "B");
00267 SR_DICTC2L_ENTRY("В", "V");
00268 SR_DICTC2L_ENTRY("Г", "G");
00269 SR_DICTC2L_ENTRY("Д", "D");
00270 SR_DICTC2L_ENTRY("Ђ", "Đ");
00271 SR_DICTC2L_ENTRY("Е", "E");
00272 SR_DICTC2L_ENTRY("Ж", "Ž");
00273 SR_DICTC2L_ENTRY("З", "Z");
00274 SR_DICTC2L_ENTRY("И", "I");
00275 SR_DICTC2L_ENTRY("Ј", "J");
00276 SR_DICTC2L_ENTRY("К", "K");
00277 SR_DICTC2L_ENTRY("Л", "L");
00278 SR_DICTC2L_ENTRY("Љ", "Lj");
00279 SR_DICTC2L_ENTRY("М", "M");
00280 SR_DICTC2L_ENTRY("Н", "N");
00281 SR_DICTC2L_ENTRY("Њ", "Nj");
00282 SR_DICTC2L_ENTRY("О", "O");
00283 SR_DICTC2L_ENTRY("П", "P");
00284 SR_DICTC2L_ENTRY("Р", "R");
00285 SR_DICTC2L_ENTRY("С", "S");
00286 SR_DICTC2L_ENTRY("Т", "T");
00287 SR_DICTC2L_ENTRY("Ћ", "Ć");
00288 SR_DICTC2L_ENTRY("У", "U");
00289 SR_DICTC2L_ENTRY("Ф", "F");
00290 SR_DICTC2L_ENTRY("Х", "H");
00291 SR_DICTC2L_ENTRY("Ц", "C");
00292 SR_DICTC2L_ENTRY("Ч", "Č");
00293 SR_DICTC2L_ENTRY("Џ", "Dž");
00294 SR_DICTC2L_ENTRY("Ш", "Š");
00295
00296 d->reflexMark = QString::fromUtf8("›")[0];
00297 #define SR_DICTI2E_ENTRY(a, b) do { \
00298 d->dictI2E[QString::fromUtf8(a)] = QString::fromUtf8(b); \
00299 } while (0)
00300
00301 SR_DICTI2E_ENTRY("ије", "е");
00302 SR_DICTI2E_ENTRY("иј", "е");
00303 SR_DICTI2E_ENTRY("је", "е");
00304 SR_DICTI2E_ENTRY("ље", "ле");
00305 SR_DICTI2E_ENTRY("ње", "не");
00306 SR_DICTI2E_ENTRY("ио", "ео");
00307 SR_DICTI2E_ENTRY("иљ", "ел");
00308
00309 SR_DICTI2E_ENTRY("лије", "ли");
00310 SR_DICTI2E_ENTRY("мија", "меја");
00311 SR_DICTI2E_ENTRY("мије", "мејe");
00312 SR_DICTI2E_ENTRY("није", "ни");
00313
00314 d->maxReflexLen = 0;
00315 foreach (const QString &reflex, d->dictI2E.keys()) {
00316 if (d->maxReflexLen < reflex.length()) {
00317 d->maxReflexLen = reflex.length();
00318 }
00319 }
00320 }
00321
00322 KTranslitSerbian::~KTranslitSerbian ()
00323 {
00324 delete d;
00325 }
00326
00327 QString KTranslitSerbian::transliterate (const QString &str_,
00328 const QString &script) const
00329 {
00330 static QString insHead("~@");
00331 static QString insHeadIje("~#");
00332
00333 QString str = str_;
00334
00335
00336 if (d->yekavianNames.contains(script)) {
00337
00338 str.remove(d->reflexMark);
00339 str = resolveInserts(str, 2, 1, insHeadIje);
00340 } else {
00341 QString nstr;
00342 int p = 0;
00343 while (true) {
00344 int pp = p;
00345 p = str.indexOf(d->reflexMark, p);
00346 if (p < 0) {
00347 nstr.append(str.mid(pp));
00348 break;
00349 }
00350 nstr.append(str.mid(pp, p - pp));
00351 p += 1;
00352
00353
00354 QString reflex;
00355 QString ekvform;
00356 for (int rl = d->maxReflexLen; rl > 0; --rl) {
00357 reflex = str.mid(p, rl);
00358 ekvform = d->dictI2E[reflex];
00359 if (!ekvform.isEmpty()) {
00360 break;
00361 }
00362 }
00363
00364 if (!ekvform.isEmpty()) {
00365 nstr.append(ekvform);
00366 p += reflex.length();
00367 } else {
00368 QString dreflex = str.mid(p - 1, d->maxReflexLen + 1);
00369 kDebug(173) << QString("Unknown yat-reflex {%1} "
00370 "in {%2}").arg(dreflex, str);
00371 nstr.append(str.mid(p - 1, 1));
00372 }
00373 }
00374 str = resolveInserts(nstr, 2, 0, insHeadIje);
00375 }
00376
00377
00378 if (d->latinNames.contains(script)) {
00379
00380 int slen = str.length();
00381 bool anyInserts = str.indexOf(insHead) >= 0;
00382 QString nstr;
00383 nstr.reserve(slen + 5);
00384 for (int i = 0; i < slen; ++i) {
00385
00386
00387 if (anyInserts) {
00388 int to = skipInsert(str, i, 2, insHead);
00389 if (to > i) {
00390 nstr.append(str.mid(i, to - i));
00391 if (to >= slen) break;
00392 i = to;
00393 }
00394 }
00395
00396 QChar c = str[i];
00397 QString r = d->dictC2L[c];
00398 if (!r.isEmpty()) {
00399 if ( r.length() > 1 && c.isUpper()
00400 && ( (i + 1 < slen && str[i + 1].isUpper())
00401 || (i > 0 && str[i - 1].isUpper()))) {
00402 nstr.append(r.toUpper());
00403 } else {
00404 nstr.append(r);
00405 }
00406 } else {
00407 nstr.append(c);
00408 }
00409 }
00410 str = resolveInserts(nstr, 2, 1, insHead);
00411 } else {
00412 str = resolveInserts(str, 2, 0, insHead);
00413 }
00414
00415 return str;
00416 }