rbbi.h

00001 /*
00002 ***************************************************************************
00003 *   Copyright (C) 1999-2004 International Business Machines Corporation   *
00004 *   and others. All rights reserved.                                      *
00005 ***************************************************************************
00006 
00007 **********************************************************************
00008 *   Date        Name        Description
00009 *   10/22/99    alan        Creation.
00010 *   11/11/99    rgillam     Complete port from Java.
00011 **********************************************************************
00012 */
00013 
00014 #ifndef RBBI_H
00015 #define RBBI_H
00016 
00017 #include "unicode/utypes.h"
00018 
00019 #if !UCONFIG_NO_BREAK_ITERATION
00020 
00021 #include "unicode/brkiter.h"
00022 #include "unicode/udata.h"
00023 #include "unicode/parseerr.h"
00024 
00025 struct UTrie;
00026 
00027 U_NAMESPACE_BEGIN
00028 
00030 struct RBBIDataHeader;
00031 class  RuleBasedBreakIteratorTables;
00032 class  BreakIterator;
00033 class  RBBIDataWrapper;
00034 struct RBBIStateTable;
00035 
00036 
00037 
00052 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00053 
00054 protected:
00059     CharacterIterator*  fText;
00060 
00065     RBBIDataWrapper    *fData;
00066 
00070     int32_t             fLastRuleStatusIndex;
00071 
00078     UBool               fLastStatusIndexValid;
00079 
00087     uint32_t           fDictionaryCharCount;
00088 
00093     static UBool        fTrace;
00094 
00095 
00096 protected:
00097     //=======================================================================
00098     // constructors
00099     //=======================================================================
00100 
00111     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00112 
00113     friend class RBBIRuleBuilder; 
00114     friend class BreakIterator;
00115 
00116 
00117 
00118 public:
00119 
00124     RuleBasedBreakIterator();
00125 
00132     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00133 
00142     RuleBasedBreakIterator( const UnicodeString    &rules,
00143                              UParseError           &parseError,
00144                              UErrorCode            &status);
00145 
00146 
00159     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00160 
00165     virtual ~RuleBasedBreakIterator();
00166 
00174     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00175 
00184     virtual UBool operator==(const BreakIterator& that) const;
00185 
00193     UBool operator!=(const BreakIterator& that) const;
00194 
00205     virtual BreakIterator* clone() const;
00206 
00212     virtual int32_t hashCode(void) const;
00213 
00219     virtual const UnicodeString& getRules(void) const;
00220 
00221     //=======================================================================
00222     // BreakIterator overrides
00223     //=======================================================================
00224 
00233     virtual const CharacterIterator& getText(void) const;
00234 
00235 
00243     virtual void adoptText(CharacterIterator* newText);
00244 
00251     virtual void setText(const UnicodeString& newText);
00252 
00259     virtual int32_t first(void);
00260 
00267     virtual int32_t last(void);
00268 
00279     virtual int32_t next(int32_t n);
00280 
00286     virtual int32_t next(void);
00287 
00293     virtual int32_t previous(void);
00294 
00302     virtual int32_t following(int32_t offset);
00303 
00311     virtual int32_t preceding(int32_t offset);
00312 
00321     virtual UBool isBoundary(int32_t offset);
00322 
00328     virtual int32_t current(void) const;
00329 
00330 
00363     virtual int32_t getRuleStatus() const;
00364 
00388     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00389 
00401     virtual UClassID getDynamicClassID(void) const;
00402 
00414     static UClassID U_EXPORT2 getStaticClassID(void);
00415 
00416     /*
00417      * Create a clone (copy) of this break iterator in memory provided
00418      *  by the caller.  The idea is to increase performance by avoiding
00419      *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
00420      *  Performance gains are minimal, and correct buffer management is
00421      *  tricky.  Use clone() instead.
00422      *
00423      * @param stackBuffer  The pointer to the memory into which the cloned object
00424      *                     should be placed.  If NULL,  allocate heap memory
00425      *                     for the cloned object.
00426      * @param BufferSize   The size of the buffer.  If zero, return the required
00427      *                     buffer size, but do not clone the object.  If the
00428      *                     size was too small (but not zero), allocate heap
00429      *                     storage for the cloned object.
00430      *
00431      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
00432      *                     returned if the the provided buffer was too small, and
00433      *                     the clone was therefore put on the heap.
00434      *
00435      * @return  Pointer to the clone object.  This may differ from the stackBuffer
00436      *          address if the byte alignment of the stack buffer was not suitable
00437      *          or if the stackBuffer was too small to hold the clone.
00438      * @stable ICU 2.0
00439      */
00440     virtual BreakIterator *  createBufferClone(void *stackBuffer,
00441                                                int32_t &BufferSize,
00442                                                UErrorCode &status);
00443 
00444 
00462     virtual const uint8_t *getBinaryRules(uint32_t &length);
00463 
00464 
00465 protected:
00466     //=======================================================================
00467     // implementation
00468     //=======================================================================
00477     virtual int32_t handleNext(void);
00478 
00487     virtual int32_t handlePrevious(void);
00488 
00495     virtual void reset(void);
00496 
00505     virtual UBool isDictionaryChar(UChar32);
00506 
00512     void init();
00513 
00514 private:
00515 
00525     int32_t handlePrevious(const RBBIStateTable *statetable);
00526 
00536     int32_t handleNext(const RBBIStateTable *statetable);
00537 
00541     void makeRuleStatusValid();
00542 
00543 };
00544 
00545 //------------------------------------------------------------------------------
00546 //
00547 //   Inline Functions Definitions ...
00548 //
00549 //------------------------------------------------------------------------------
00550 
00551 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00552     return !operator==(that);
00553 }
00554 
00555 U_NAMESPACE_END
00556 
00557 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
00558 
00559 #endif

Generated on Tue Nov 16 10:03:05 2004 for ICU 3.2 by  doxygen 1.3.9.1