• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

ChineseGroupProber.cpp

Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /*  -*- C++ -*-
00003 *  Copyright (C) 1998 <developer@mozilla.org>
00004 *
00005 *
00006 *  Permission is hereby granted, free of charge, to any person obtaining
00007 *  a copy of this software and associated documentation files (the
00008 *  "Software"), to deal in the Software without restriction, including
00009 *  without limitation the rights to use, copy, modify, merge, publish,
00010 *  distribute, sublicense, and/or sell copies of the Software, and to
00011 *  permit persons to whom the Software is furnished to do so, subject to
00012 *  the following conditions:
00013 *
00014 *  The above copyright notice and this permission notice shall be included 
00015 *  in all copies or substantial portions of the Software.
00016 *
00017 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00018 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00020 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00021 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00022 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00023 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00024 */
00025 
00026 #include "ChineseGroupProber.h"
00027 
00028 #include "UnicodeGroupProber.h"
00029 #include "nsGB2312Prober.h"
00030 #include "nsBig5Prober.h"
00031 
00032 #include <stdio.h>
00033 #include <stdlib.h>
00034 
00035 namespace kencodingprober {
00036 #ifdef DEBUG_PROBE
00037 static char *ProberName[] = 
00038 {
00039   "Unicode",
00040   "GB18030",
00041   "Big5",
00042 };
00043 
00044 #endif
00045 
00046 ChineseGroupProber::ChineseGroupProber()
00047 {
00048   mProbers[0] = new UnicodeGroupProber();
00049   mProbers[1] = new nsGB18030Prober();
00050   mProbers[2] = new nsBig5Prober();
00051   Reset();
00052 }
00053 
00054 ChineseGroupProber::~ChineseGroupProber()
00055 {
00056   for (unsigned int i = 0; i < CN_NUM_OF_PROBERS; i++)
00057   {
00058     delete mProbers[i];
00059   }
00060 }
00061 
00062 const char* ChineseGroupProber::GetCharSetName()
00063 {
00064   if (mBestGuess == -1)
00065   {
00066     GetConfidence();
00067     if (mBestGuess == -1)
00068       mBestGuess = 1;       // assume it's GB18030
00069   }
00070   return mProbers[mBestGuess]->GetCharSetName();
00071 }
00072 
00073 void  ChineseGroupProber::Reset(void)
00074 {
00075   mActiveNum = 0;
00076   for (unsigned int i = 0; i < CN_NUM_OF_PROBERS; i++)
00077   {
00078     if (mProbers[i])
00079     {
00080       mProbers[i]->Reset();
00081       mIsActive[i] = true;
00082       ++mActiveNum;
00083     }
00084     else
00085       mIsActive[i] = false;
00086   }
00087   mBestGuess = -1;
00088   mState = eDetecting;
00089 }
00090 
00091 nsProbingState ChineseGroupProber::HandleData(const char* aBuf, unsigned int aLen)
00092 {
00093   nsProbingState st;
00094   unsigned int i;
00095 
00096   //do filtering to reduce load to probers
00097   char *highbyteBuf;
00098   char *hptr;
00099   bool keepNext = true;   //assume previous is not ascii, it will do no harm except add some noise
00100   hptr = highbyteBuf = (char*)malloc(aLen);
00101   if (!hptr)
00102       return mState;
00103   for (i = 0; i < aLen; ++i)
00104   {
00105     if (aBuf[i] & 0x80)
00106     {
00107       *hptr++ = aBuf[i];
00108       keepNext = true;
00109     }
00110     else
00111     {
00112       //if previous is highbyte, keep this even it is a ASCII
00113       if (keepNext)
00114       {
00115           *hptr++ = aBuf[i];
00116           keepNext = false;
00117       }
00118     }
00119   }
00120 
00121   for (i = 0; i < CN_NUM_OF_PROBERS; ++i)
00122   {
00123      if (!mIsActive[i])
00124        continue;
00125      st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
00126      if (st == eFoundIt)
00127      {
00128        mBestGuess = i;
00129        mState = eFoundIt;
00130        break;
00131      }
00132      else if (st == eNotMe)
00133      {
00134        mIsActive[i] = false;
00135        mActiveNum--;
00136        if (mActiveNum <= 0)
00137        {
00138          mState = eNotMe;
00139          break;
00140        }
00141      }
00142   }
00143 
00144   free(highbyteBuf);
00145 
00146   return mState;
00147 }
00148 
00149 float ChineseGroupProber::GetConfidence(void)
00150 {
00151   unsigned int i;
00152   float bestConf = 0.0, cf;
00153 
00154   switch (mState)
00155   {
00156   case eFoundIt:
00157     return (float)0.99;
00158   case eNotMe:
00159     return (float)0.01;
00160   default:
00161     for (i = 0; i < CN_NUM_OF_PROBERS; ++i)
00162     {
00163       if (!mIsActive[i])
00164         continue;
00165       cf = mProbers[i]->GetConfidence();
00166       if (bestConf < cf)
00167       {
00168         bestConf = cf;
00169         mBestGuess = i;
00170       }
00171     }
00172   }
00173   return bestConf;
00174 }
00175 
00176 #ifdef DEBUG_PROBE
00177 void ChineseGroupProber::DumpStatus()
00178 {
00179   unsigned int i;
00180   float cf;
00181   
00182   GetConfidence();
00183   for (i = 0; i < CN_NUM_OF_PROBERS; i++)
00184   {
00185     if (!mIsActive[i])
00186       printf("  Chinese group inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
00187     else
00188     {
00189       cf = mProbers[i]->GetConfidence();
00190       printf("  Chinese group %1.3f: [%s]\r\n", cf, ProberName[i]);
00191     }
00192   }
00193 }
00194 #endif
00195 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal