• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KHTML

khtml_filter.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE project
00002 
00003    Copyright (C) 2005 Ivor Hewitt     <ivor@kde.org>
00004    Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
00005    Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Library General Public
00009    License as published by the Free Software Foundation; either
00010    version 2 of the License, or (at your option) any later version.
00011 
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Library General Public License for more details.
00016 
00017    You should have received a copy of the GNU Library General Public License
00018    along with this library; see the file COPYING.LIB.  If not, write to
00019    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00020    Boston, MA 02110-1301, USA.
00021 */
00022 
00023 #include "khtml_filter_p.h"
00024 #include <QDebug>
00025 
00026 // rolling hash parameters
00027 #define HASH_P (1997)
00028 #define HASH_Q (17509)
00029 // HASH_MOD = (HASH_P^7) % HASH_Q
00030 #define HASH_MOD (523)
00031 
00032 namespace khtml {
00033 
00034 void FilterSet::addFilter(const QString& filterStr)
00035 {
00036     QString filter = filterStr;
00037     
00038     if (filter.startsWith(QLatin1Char('!')))
00039         return;
00040 
00041     // Strip leading @@
00042     int first = 0;
00043     int last  = filter.length() - 1;
00044     if (filter.startsWith(QLatin1String("@@")))
00045         first = 2;
00046         
00047     // Strip options, we ignore them for now.
00048     int dollar = filter.lastIndexOf(QLatin1Char('$'));
00049     if (dollar != -1)
00050         last = dollar - 1;
00051     
00052     // Perhaps nothing left?
00053     if (first > last)
00054         return;
00055         
00056     filter = filter.mid(first, last - first + 1);
00057     
00058     // Is it a regexp filter?
00059     if (filter.length()>2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/')))
00060     {
00061         QString inside = filter.mid(1, filter.length()-2);
00062         QRegExp rx(inside);
00063         reFilters.append(rx);
00064 //         qDebug() << "R:" << inside;
00065     }
00066     else
00067     {
00068         // Nope, a wildcard one.
00069         // Note: For these, we also need to handle |.
00070         
00071         // Strip wildcards at the ends
00072         first = 0;
00073         last  = filter.length() - 1;
00074         
00075         while (first < filter.length() && filter[first] == QLatin1Char('*'))
00076             ++first;
00077             
00078         while (last >= 0 && filter[last] == QLatin1Char('*'))
00079             --last;
00080             
00081         if (first > last)
00082             filter = QLatin1String("*"); // erm... Well, they asked for it.
00083         else
00084             filter = filter.mid(first, last - first + 1);
00085             
00086         // Now, do we still have any wildcard stuff left?
00087         if (filter.contains("*") || filter.contains("?")) 
00088         {
00089 //             qDebug() << "W:" << filter;
00090             // check if we can use RK first (and then check full RE for the rest) for better performance
00091             int aPos = filter.indexOf('*');
00092             if (aPos < 0)
00093                 aPos = filter.length();
00094             int qPos = filter.indexOf('?');
00095             if (qPos < 0)
00096                 qPos = filter.length();
00097             int pos = qMin(aPos, qPos);
00098             if (pos > 7) {
00099                 QRegExp rx;
00100 
00101                 rx.setPatternSyntax(QRegExp::Wildcard);
00102                 rx.setPattern(filter.mid(pos));
00103 
00104                 stringFiltersMatcher.addWildedString(filter.mid(0, pos), rx);
00105 
00106             } else {
00107                 QRegExp rx;
00108 
00109                 rx.setPatternSyntax(QRegExp::Wildcard);
00110                 rx.setPattern(filter);
00111                 reFilters.append(rx);
00112             }
00113         }
00114         else
00115         {
00116             // Fast path
00117             stringFiltersMatcher.addString(filter);
00118         }
00119     }
00120 }
00121 
00122 bool FilterSet::isUrlMatched(const QString& url)
00123 {
00124     if (stringFiltersMatcher.isMatched(url))
00125         return true;
00126 
00127     for (int c = 0; c < reFilters.size(); ++c)
00128     {
00129         if (url.contains(reFilters[c]))
00130             return true;
00131     }
00132 
00133     return false;
00134 }
00135 
00136 void FilterSet::clear()
00137 {
00138     reFilters.clear();
00139     stringFiltersMatcher.clear();
00140 }
00141 
00142 
00143 void StringsMatcher::addString(const QString& pattern)
00144 {
00145     if (pattern.length() < 8) {
00146         // handle short string differently
00147         shortStringFilters.append(pattern);
00148     } else {
00149         // use modified Rabin-Karp's algorithm with 8-length string hash
00150         // i.e. store hash of first 8 chars in the HashMap for fast look-up
00151         stringFilters.append(pattern);
00152         int ind = stringFilters.size() - 1;
00153         int current = 0;
00154 
00155         // compute hash using rolling hash
00156         // hash for string: x0,x1,x2...xn-1 will be:
00157         // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q
00158         // where p and q some wisely-chosen integers
00159         /*for (int k = 0; k < 8; ++k)*/
00160         int len = pattern.length();
00161         for (int k = len - 8; k < len; ++k)
00162             current = (current * HASH_P + pattern[k].unicode()) % HASH_Q;
00163 
00164         // insert computed hash value into HashMap
00165         WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
00166         if (it == stringFiltersHash.end()) {
00167             QVector<int> list;
00168             list.append(ind);
00169             stringFiltersHash.add(current + 1, list);
00170             fastLookUp.setBit(current);
00171         } else {
00172             it->second.append(ind);
00173         }
00174     }
00175 }
00176 
00177 void StringsMatcher::addWildedString(const QString& prefix, const QRegExp& rx)
00178 {
00179     rePrefixes.append(prefix);
00180     reFilters.append(rx);
00181     int index = -rePrefixes.size();
00182 
00183     int current = 0;
00184     for (int k = 0; k < 8; ++k)
00185         current = (current * HASH_P + prefix[k].unicode()) % HASH_Q;
00186 
00187     // insert computed hash value into HashMap
00188     WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
00189     if (it == stringFiltersHash.end()) {
00190         QVector<int> list;
00191         list.append(index);
00192         stringFiltersHash.add(current + 1, list);
00193         fastLookUp.setBit(current);
00194     } else {
00195         it->second.append(index);
00196     }
00197 }
00198 
00199 bool StringsMatcher::isMatched(const QString& str) const
00200 {
00201     // check short strings first
00202     for (int i = 0; i < shortStringFilters.size(); ++i) {
00203         if (str.contains(shortStringFilters[i]))
00204             return true;
00205     }
00206 
00207     int len = str.length();
00208     int k;
00209 
00210     int current = 0;
00211     int next = 0;
00212     // compute hash for first 8 characters
00213     for (k = 0; k < 8 && k < len; ++k)
00214         current = (current * HASH_P + str[k].unicode()) % HASH_Q;
00215 
00216     WTF::HashMap<int, QVector<int> >::const_iterator hashEnd = stringFiltersHash.end();
00217     // main Rabin-Karp's algorithm loop
00218     for (k = 7; k < len; ++k, current = next) {
00219         // roll the hash if not at the end
00220         // (calculate hash for the next iteration)
00221         if (k + 1 < len)
00222             next = (HASH_P * ((current + HASH_Q - ((HASH_MOD * str[k - 7].unicode()) % HASH_Q)) % HASH_Q) + str[k + 1].unicode()) % HASH_Q;
00223 
00224         if (!fastLookUp.testBit(current))
00225             continue;
00226 
00227         // look-up the hash in the HashMap and check all strings
00228         WTF::HashMap<int, QVector<int> >::const_iterator it = stringFiltersHash.find(current + 1);
00229 
00230         // check possible strings
00231         if (it != hashEnd) {
00232             for (int j = 0; j < it->second.size(); ++j) {
00233                 int index = it->second[j];
00234                 // check if we got simple string or REs prefix
00235                 if (index >= 0) {
00236                     int flen = stringFilters[index].length();
00237                     if (k - flen + 1 >= 0 && stringFilters[index] == str.midRef(k - flen + 1 , flen))
00238                         return true;
00239                 } else {
00240                     index = -index - 1;
00241                     int flen = rePrefixes[index].length();
00242                     if (k - 8 + flen < len && rePrefixes[index] == str.midRef(k - 7, flen) &&
00243                             str.indexOf(reFilters[index], k - 7 + flen) == k - 7 + flen)
00244                         return true;
00245                 }
00246             }
00247         }
00248     }
00249 
00250     return false;
00251 }
00252 
00253 void StringsMatcher::clear()
00254 {
00255     stringFilters.clear();
00256     shortStringFilters.clear();
00257     reFilters.clear();
00258     rePrefixes.clear();
00259     stringFiltersHash.clear();
00260     fastLookUp.resize(HASH_Q);
00261     fastLookUp.fill(0, 0, HASH_Q);
00262 }
00263 
00264 }
00265 
00266 // kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;

KHTML

Skip menu "KHTML"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal