• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

nsSBCharSetProber.h

Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /*  -*- C++ -*-
00003 *  Copyright (C) 1998 <developer@mozilla.org>
00004 *
00005 *
00006 *  Permission is hereby granted, free of charge, to any person obtaining
00007 *  a copy of this software and associated documentation files (the
00008 *  "Software"), to deal in the Software without restriction, including
00009 *  without limitation the rights to use, copy, modify, merge, publish,
00010 *  distribute, sublicense, and/or sell copies of the Software, and to
00011 *  permit persons to whom the Software is furnished to do so, subject to
00012 *  the following conditions:
00013 *
00014 *  The above copyright notice and this permission notice shall be included 
00015 *  in all copies or substantial portions of the Software.
00016 *
00017 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00018 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00020 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00021 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00022 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00023 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00024 */
00025 
00026 #ifndef NSSBCHARSETPROBER_H
00027 #define NSSBCHARSETPROBER_H
00028 
00029 #include "nsCharSetProber.h"
00030 
00031 #define SAMPLE_SIZE 64
00032 #define SB_ENOUGH_REL_THRESHOLD  1024
00033 #define POSITIVE_SHORTCUT_THRESHOLD  (float)0.95
00034 #define NEGATIVE_SHORTCUT_THRESHOLD  (float)0.05
00035 #define SYMBOL_CAT_ORDER  250
00036 #define NUMBER_OF_SEQ_CAT 4
00037 #define POSITIVE_CAT   (NUMBER_OF_SEQ_CAT-1)
00038 #define NEGATIVE_CAT   0
00039 
00040 namespace kencodingprober {
00041 typedef struct
00042 {
00043   const unsigned char *charToOrderMap;    // [256] table use to find a char's order
00044   const char *precedenceMatrix;           // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
00045   float  mTypicalPositiveRatio;     // = freqSeqs / totalSeqs 
00046   bool keepEnglishLetter;         // says if this script contains English characters (not implemented)
00047   const char* charsetName;
00048 } SequenceModel;
00049 
00050 
00051 class KDE_NO_EXPORT nsSingleByteCharSetProber : public nsCharSetProber{
00052 public:
00053   nsSingleByteCharSetProber(SequenceModel *model) 
00054     :mModel(model), mReversed(false), mNameProber(0) { Reset(); }
00055   nsSingleByteCharSetProber(SequenceModel *model, bool reversed, nsCharSetProber* nameProber)
00056     :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
00057 
00058   virtual const char* GetCharSetName();
00059   virtual nsProbingState HandleData(const char* aBuf, unsigned int aLen);
00060   virtual nsProbingState GetState(void) {return mState;};
00061   virtual void      Reset(void);
00062   virtual float     GetConfidence(void);
00063   virtual void      SetOpion() {};
00064   
00065   // This feature is not implemented yet. any current language model
00066   // contain this parameter as false. No one is looking at this
00067   // parameter or calling this method.
00068   // Moreover, the nsSBCSGroupProber which calls the HandleData of this
00069   // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
00070   // of the English letters.
00071   bool KeepEnglishLetters() {return mModel->keepEnglishLetter;}; // (not implemented)
00072 
00073 #ifdef DEBUG_PROBE
00074   virtual void  DumpStatus();
00075 #endif
00076 
00077 protected:
00078   nsProbingState mState;
00079   const SequenceModel *mModel;
00080   const bool mReversed; // true if we need to reverse every pair in the model lookup
00081 
00082   //char order of last character
00083   unsigned char mLastOrder;
00084 
00085   unsigned int mTotalSeqs;
00086   unsigned int mSeqCounters[NUMBER_OF_SEQ_CAT];
00087 
00088   unsigned int mTotalChar;
00089   //characters that fall in our sampling range
00090   unsigned int mFreqChar;
00091   
00092   // Optional auxiliary prober for name decision. created and destroyed by the GroupProber
00093   nsCharSetProber* mNameProber; 
00094 
00095 };
00096 
00097 
00098 extern SequenceModel Koi8rModel;
00099 extern SequenceModel Win1251Model;
00100 extern SequenceModel Latin5Model;
00101 extern SequenceModel MacCyrillicModel;
00102 extern SequenceModel Ibm866Model;
00103 extern SequenceModel Ibm855Model;
00104 extern SequenceModel Latin7Model;
00105 extern SequenceModel Win1253Model;
00106 extern SequenceModel Latin5BulgarianModel;
00107 extern SequenceModel Win1251BulgarianModel;
00108 extern SequenceModel Latin2HungarianModel;
00109 extern SequenceModel Win1250HungarianModel;
00110 extern SequenceModel Win1255Model;
00111 }
00112 #endif /* NSSBCHARSETPROBER_H */
00113 

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal