• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

nsSBCSGroupProber.cpp

Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /*  -*- C++ -*-
00003 *  Copyright (C) 1998 <developer@mozilla.org>
00004 *
00005 *
00006 *  Permission is hereby granted, free of charge, to any person obtaining
00007 *  a copy of this software and associated documentation files (the
00008 *  "Software"), to deal in the Software without restriction, including
00009 *  without limitation the rights to use, copy, modify, merge, publish,
00010 *  distribute, sublicense, and/or sell copies of the Software, and to
00011 *  permit persons to whom the Software is furnished to do so, subject to
00012 *  the following conditions:
00013 *
00014 *  The above copyright notice and this permission notice shall be included 
00015 *  in all copies or substantial portions of the Software.
00016 *
00017 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00018 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00020 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00021 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00022 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00023 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00024 */
00025 
00026 #include "nsSBCSGroupProber.h"
00027 
00028 #include "nsSBCharSetProber.h"
00029 #include "nsHebrewProber.h"
00030 #include "UnicodeGroupProber.h"
00031 
00032 #include <stdio.h>
00033 #include <stdlib.h>
00034 
00035 namespace kencodingprober {
00036 nsSBCSGroupProber::nsSBCSGroupProber()
00037 {
00038   mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
00039   mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
00040   mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
00041   mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
00042   mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
00043   mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
00044   mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
00045   mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
00046   mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
00047   mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
00048 
00049   nsHebrewProber *hebprober = new nsHebrewProber();
00050   // Notice: Any change in these indexes - 10,11,12 must be reflected
00051   // in the code below as well.
00052   mProbers[10] = hebprober;
00053   mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew
00054   mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew
00055   mProbers[13] = new UnicodeGroupProber();
00056   
00057   // Tell the Hebrew prober about the logical and visual probers
00058   if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
00059   {
00060     hebprober->SetModelProbers(mProbers[11], mProbers[12]);
00061   }
00062   else // One or more is null. avoid any Hebrew probing, null them all
00063   {
00064     for (unsigned int i = 10; i <= 12; ++i)
00065     { 
00066       delete mProbers[i]; 
00067       mProbers[i] = 0; 
00068     }
00069   }
00070 
00071   // disable latin2 before latin1 is available, otherwise all latin1 
00072   // will be detected as latin2 because of their similarity.
00073   //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
00074   //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
00075 
00076   Reset();
00077 }
00078 
00079 nsSBCSGroupProber::~nsSBCSGroupProber()
00080 {
00081   for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++)
00082   {
00083     delete mProbers[i];
00084   }
00085 }
00086 
00087 
00088 const char* nsSBCSGroupProber::GetCharSetName()
00089 {
00090   //if we have no answer yet
00091   if (mBestGuess == -1)
00092   {
00093     GetConfidence();
00094     //no charset seems positive
00095     if (mBestGuess == -1)
00096       //we will use default.
00097       mBestGuess = 0;
00098   }
00099   return mProbers[mBestGuess]->GetCharSetName();
00100 }
00101 
00102 void  nsSBCSGroupProber::Reset(void)
00103 {
00104   mActiveNum = 0;
00105   for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++)
00106   {
00107     if (mProbers[i]) // not null
00108     {
00109       mProbers[i]->Reset();
00110       mIsActive[i] = true;
00111       ++mActiveNum;
00112     }
00113     else
00114       mIsActive[i] = false;
00115   }
00116   mBestGuess = -1;
00117   mState = eDetecting;
00118 }
00119 
00120 
00121 nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, unsigned int aLen)
00122 {
00123   nsProbingState st;
00124   unsigned int i;
00125   char *newBuf1 = 0;
00126   unsigned int newLen1 = 0;
00127 
00128   //apply filter to original buffer, and we got new buffer back
00129   //depend on what script it is, we will feed them the new buffer 
00130   //we got after applying proper filter
00131   //this is done without any consideration to KeepEnglishLetters
00132   //of each prober since as of now, there are no probers here which
00133   //recognize languages with English characters.
00134   if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
00135     goto done;
00136   
00137   if (newLen1 == 0)
00138     goto done; // Nothing to see here, move on.
00139 
00140   for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i)
00141   {
00142      if (!mIsActive[i])
00143        continue;
00144      st = mProbers[i]->HandleData(newBuf1, newLen1);
00145      if (st == eFoundIt)
00146      {
00147        mBestGuess = i;
00148        mState = eFoundIt;
00149        break;
00150      }
00151      else if (st == eNotMe)
00152      {
00153        mIsActive[i] = false;
00154        mActiveNum--;
00155        if (mActiveNum <= 0)
00156        {
00157          mState = eNotMe;
00158          break;
00159        }
00160      }
00161   }
00162 
00163 done:
00164   free(newBuf1);
00165 
00166   return mState;
00167 }
00168 
00169 float nsSBCSGroupProber::GetConfidence(void)
00170 {
00171   unsigned int i;
00172   float bestConf = 0.0, cf;
00173 
00174   switch (mState)
00175   {
00176   case eFoundIt:
00177     return (float)0.99; //sure yes
00178   case eNotMe:
00179     return (float)0.01;  //sure no
00180   default:
00181     for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i)
00182     {
00183       if (!mIsActive[i])
00184         continue;
00185       cf = mProbers[i]->GetConfidence();
00186       if (bestConf < cf)
00187       {
00188         bestConf = cf;
00189         mBestGuess = i;
00190       }
00191     }
00192   }
00193   return bestConf;
00194 }
00195 
00196 #ifdef DEBUG_PROBE
00197 void nsSBCSGroupProber::DumpStatus()
00198 {
00199   unsigned int i;
00200   float cf;
00201   
00202   cf = GetConfidence();
00203   printf(" SBCS Group Prober --------begin status \r\n");
00204   for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
00205   {
00206     if (!mIsActive[i])
00207       printf("  inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
00208     else
00209       mProbers[i]->DumpStatus();
00210   }
00211   printf(" SBCS Group found best match [%s] confidence %f.\r\n",  
00212          mProbers[mBestGuess]->GetCharSetName(), cf);
00213 }
00214 #endif
00215 }
00216 
00217 

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal