• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • API Reference
  • Sitemap
  • Contact Us
 

NepomukDaemons

clucenetokenizer.h

Go to the documentation of this file.
00001 /*
00002  * Modified version of StandardTokenizer.h for Nepomuk mostly to optimize for filename indexing
00003  * Copyright (C) 2008 Sebastian Trueg <trueg@kde.org>
00004  *
00005  * Based on StandardTokenizer.h from the CLucene package.
00006  * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
00007  *
00008  * This library is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU Library General Public
00010  * License as published by the Free Software Foundation; either
00011  * version 2 of the License, or (at your option) any later version.
00012  *
00013  * This library is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016  * Library General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Library General Public License
00019  * along with this library; see the file COPYING.LIB.  If not, write to
00020  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021  * Boston, MA 02110-1301, USA.
00022  */
00023 
00024 #ifndef _NEPOMUK_CLUCENE_TOKENIZER_H_
00025 #define _NEPOMUK_CLUCENE_TOKENIZER_H_
00026 
00027 #include <CLucene/clucene-config.h>
00028 #include <CLucene/analysis/AnalysisHeader.h>
00029 #include <CLucene/analysis/Analyzers.h>
00030 #include <CLucene/util/StringBuffer.h>
00031 #include <CLucene/util/FastCharStream.h>
00032 #include <CLucene/util/Reader.h>
00033 
00034 #include "clucenetokenizerconstants.h"
00035 
00036 namespace Nepomuk {
00037 
00054     class CLuceneTokenizer: public CL_NS(analysis)::Tokenizer
00055     {
00056     public:
00057         CL_NS(util)::FastCharStream* rd;
00058 
00059         // Constructs a tokenizer for this Reader.
00060         CLuceneTokenizer(CL_NS(util)::Reader* reader);
00061 
00062         ~CLuceneTokenizer();
00063 
00067         bool next(CL_NS(analysis)::Token* token);
00068 
00069         // Reads for number like "1"/"1234.567", or IP address like "192.168.1.2".
00070         bool ReadNumber(const TCHAR* previousNumber, const TCHAR prev, CL_NS(analysis)::Token* t);
00071 
00072         bool ReadAlphaNum(const TCHAR prev, CL_NS(analysis)::Token* t);
00073 
00074         // Reads for apostrophe-containing word.
00075         bool ReadApostrophe(CL_NS(util)::StringBuffer* str, CL_NS(analysis)::Token* t);
00076 
00077         // Reads for something@... it may be a COMPANY name or a EMAIL address
00078         bool ReadAt(CL_NS(util)::StringBuffer* str, CL_NS(analysis)::Token* t);
00079 
00080         // Reads for COMPANY name like AT&T.
00081         bool ReadCompany(CL_NS(util)::StringBuffer* str, CL_NS(analysis)::Token* t);
00082     
00083         // Reads CJK characters
00084         bool ReadCJK(const TCHAR prev, CL_NS(analysis)::Token* t);
00085 
00086     private:
00087         int32_t rdPos;
00088         int32_t tokenStart;
00089 
00090         // Advance by one character, incrementing rdPos and returning the character.
00091         int readChar();
00092         // Retreat by one character, decrementing rdPos.
00093         void unReadChar();
00094 
00095         // createToken centralizes token creation for auditing purposes.
00096         //Token* createToken(CL_NS(util)::StringBuffer* sb, TokenTypes tokenCode);
00097         inline bool setToken(CL_NS(analysis)::Token* t, CL_NS(util)::StringBuffer* sb, TokenTypes tokenCode);
00098 
00099         bool ReadDotted(CL_NS(util)::StringBuffer* str, TokenTypes forcedType, CL_NS(analysis)::Token* t);
00100     };
00101 }
00102 
00103 #endif

NepomukDaemons

Skip menu "NepomukDaemons"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

API Reference

Skip menu "API Reference"
  • KCMShell
  • KNotify
  • KStyles
  • Nepomuk Daemons
Generated for API Reference by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal