• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

UnicodeGroupProber.cpp

Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /*  -*- C++ -*-
00003 *  Copyright (C) 2008 <wkai@gmail.com>
00004 *
00005 *
00006 *  Permission is hereby granted, free of charge, to any person obtaining
00007 *  a copy of this software and associated documentation files (the
00008 *  "Software"), to deal in the Software without restriction, including
00009 *  without limitation the rights to use, copy, modify, merge, publish,
00010 *  distribute, sublicense, and/or sell copies of the Software, and to
00011 *  permit persons to whom the Software is furnished to do so, subject to
00012 *  the following conditions:
00013 *
00014 *  The above copyright notice and this permission notice shall be included 
00015 *  in all copies or substantial portions of the Software.
00016 *
00017 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00018 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00020 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00021 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00022 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00023 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00024 */
00025 
00026 #include "UnicodeGroupProber.h"
00027 
00028 #include "ctype_test_p.h"
00029 
00030 #include <QtAlgorithms>
00031 #include <math.h>
00032 
00033 namespace kencodingprober {
00034 UnicodeGroupProber::UnicodeGroupProber(void)
00035 {
00036   mCodingSM[0] = new nsCodingStateMachine(&UTF8SMModel);
00037   mCodingSM[1] = new nsCodingStateMachine(&UCS2LESMModel);
00038   mCodingSM[2] = new nsCodingStateMachine(&UCS2BESMModel);
00039   mActiveSM = NUM_OF_UNICODE_CHARSETS;
00040   mState = eDetecting;
00041   mDetectedCharset = "UTF-8";
00042 }
00043 
00044 UnicodeGroupProber::~UnicodeGroupProber(void)
00045 {
00046   for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++)
00047     delete mCodingSM[i];
00048 }
00049 
00050 void UnicodeGroupProber::Reset(void)
00051 {
00052   mState = eDetecting;
00053   for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++)
00054     mCodingSM[i]->Reset();
00055   mActiveSM = NUM_OF_UNICODE_CHARSETS;
00056   mDetectedCharset = "UTF-8";
00057 }
00058 
00059 nsProbingState UnicodeGroupProber::HandleData(const char* aBuf, unsigned int aLen)
00060 {
00061   nsSMState codingState;
00062   int j;
00063   uint i, weight_BOM, counts[5];
00064   static bool disableUTF16LE = false;
00065   static bool disableUTF16BE = false;
00066   double weight_zero;
00067   
00068   if (mActiveSM <= 0) {
00069       mState = eNotMe;
00070       return mState;
00071   }
00072   
00073   if (! (disableUTF16LE || disableUTF16BE)) {
00074     if (aLen%2 != 0) {
00075             disableUTF16LE = true;
00076             disableUTF16BE = true;
00077     }      
00078     weight_BOM = (uint)(sqrt((double)aLen) + aLen/10.0);
00079     for (uint i = 0; i < 5; i++) 
00080         qCount(aBuf, aBuf+aLen, i, counts[i]);
00081     weight_zero = (2.0*(counts[0] + counts[1] + counts[2] + counts[3] + counts[4]) + weight_BOM)/aLen;
00082     if (weight_zero < log(1.4142)) {
00083         disableUTF16LE = true;
00084         disableUTF16BE = true;
00085     }
00086     if (4 >= aBuf[1] && aBuf[1] >= 0 && isprint(aBuf[0]))
00087         disableUTF16BE = true;
00088     else 
00089         disableUTF16LE = true;
00090     if (disableUTF16BE)
00091       mActiveSM--;
00092     if (disableUTF16LE) {
00093       nsCodingStateMachine* t;
00094       t = mCodingSM[1];
00095       mCodingSM[1] = mCodingSM[2];
00096       mCodingSM[2] = t;
00097       mActiveSM--;
00098     }
00099   }
00100   
00101   for (i = 0; i < aLen; ++i) {
00102     for (j = mActiveSM-1; j>= 0; --j)
00103     {
00104       //byte is feed to all active state machine 
00105       codingState = mCodingSM[j]->NextState(aBuf[i]);
00106       if (codingState == eError)
00107       {
00108         //got negative answer for this state machine, make it inactive
00109         mActiveSM--;
00110         if (mActiveSM == 0)
00111         {
00112           mState = eNotMe;
00113           return mState;
00114         }
00115         else if (j != (int)mActiveSM)
00116         {
00117           nsCodingStateMachine* t;
00118           t = mCodingSM[mActiveSM];
00119           mCodingSM[mActiveSM] = mCodingSM[j];
00120           mCodingSM[j] = t;
00121         }
00122       }
00123       else if (codingState == eItsMe)
00124       {
00125         mState = eFoundIt;
00126         mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
00127         return mState;
00128       } else if (mState == eDetecting)
00129           mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();;
00130     }
00131   }
00132   return mState;
00133 }
00134 
00135 float UnicodeGroupProber::GetConfidence()
00136 {
00137   if (mState == eFoundIt)
00138     return 0.99f;
00139   else
00140     return 0.0f;
00141 }
00142 
00143 #ifdef DEBUG_PROBE
00144 void UnicodeGroupProber::DumpStatus()
00145 {
00146     GetConfidence();
00147     for (uint i = 0; i < mActiveSM; i++)
00148     {
00149         kDebug(180) << "Unicode group" << mCodingSM[i]->DumpCurrentState() << mCodingSM[i]->GetCodingStateMachine() ;
00150     }
00151 }
00152 #endif
00153 
00154 }
00155 
00156 

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal