• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KIOSlave

parsinghelpers.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002     Copyright (C) 2008 Andreas Hartmetz <ahartmetz@gmail.com>
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public License
00015     along with this library; see the file COPYING.LIB.  If not, write to
00016     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017     Boston, MA 02110-1301, USA.
00018 */
00019 
00020 
00021 // Advance *pos beyond spaces / tabs
00022 static void skipSpace(const char input[], int *pos, int end)
00023 {
00024     int idx = *pos;
00025     while (idx < end && (input[idx] == ' ' || input[idx] == '\t')) {
00026         idx++;
00027     }
00028     *pos = idx;
00029     return;
00030 }
00031 
00032 // Advance *pos beyond anything not space/tab/CR/LF
00033 static void skipNonSpace(const char input[], int *pos, int end)
00034 {
00035     int idx = *pos;
00036     while (idx < end && input[idx] != ' ' && input[idx] != '\t' &&
00037                         input[idx] != '\r' && input[idx] != '\n') {
00038         idx++;
00039     }
00040     *pos = idx;
00041     return;
00042 }
00043 
00044 // Advance *pos to start of next line while being forgiving about line endings.
00045 // Return false if the end of the header has been reached, true otherwise.
00046 static bool nextLine(const char input[], int *pos, int end)
00047 {
00048     int idx = *pos;
00049     while (idx < end && input[idx] != '\r' && input[idx] != '\n') {
00050         idx++;
00051     }
00052     int rCount = 0;
00053     int nCount = 0;
00054     while (idx < end && qMax(rCount, nCount) < 2 && (input[idx] == '\r' || input[idx] == '\n')) {
00055         input[idx] == '\r' ? rCount++ : nCount++;
00056         idx++;
00057     }
00058     if (idx < end && qMax(rCount, nCount) == 2 && qMin(rCount, nCount) == 1) {
00059         // if just one of the others is missing eat it too.
00060         // this ensures that conforming headers using the proper
00061         // \r\n sequence (and also \n\r) will be parsed correctly.
00062         if ((rCount == 1 && input[idx] == '\r') || (nCount == 1 && input[idx] == '\n')) {
00063             idx++;
00064         }
00065     }
00066 
00067     *pos = idx;
00068     return idx < end && rCount < 2 && nCount < 2;
00069 }
00070 
00071 //Return true if the term was found, false otherwise. Advance *pos.
00072 //If (*pos + strlen(term) >= end) just advance *pos to end and return false.
00073 //This means that users should always search for the shortest terms first.
00074 static bool consume(const char input[], int *pos, int end, const char *term)
00075 {
00076     // note: gcc/g++ is quite good at optimizing away redundant strlen()s
00077     int idx = *pos;
00078     if (idx + (int)strlen(term) >= end) {
00079         *pos = end;
00080         return false;
00081     }
00082     if (strncasecmp(&input[idx], term, strlen(term)) == 0) {
00083         *pos = idx + strlen(term);
00084         return true;
00085     }
00086     return false;
00087 }
00088 
00089 
00090 QByteArray TokenIterator::next()
00091 {
00092     QPair<int, int> token = m_tokens[m_currentToken++];
00093     //fromRawData brings some speed advantage but also the requirement to keep the text buffer
00094     //around. this together with implicit sharing (you don't know where copies end up)
00095     //is dangerous!
00096     //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
00097     return QByteArray(&m_buffer[token.first], token.second - token.first);
00098 }
00099 
00100 QByteArray TokenIterator::current() const
00101 {
00102     QPair<int, int> token = m_tokens[m_currentToken - 1];
00103     //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
00104     return QByteArray(&m_buffer[token.first], token.second - token.first);
00105 }
00106 
00107 QList<QByteArray> TokenIterator::all() const
00108 {
00109     QList<QByteArray> ret;
00110     for (int i = 0; i < m_tokens.count(); i++) {
00111         QPair<int, int> token = m_tokens[i];
00112         ret.append(QByteArray(&m_buffer[token.first], token.second - token.first));
00113     }
00114     return ret;
00115 }
00116 
00117 
00118 HeaderTokenizer::HeaderTokenizer(char *buffer)
00119  : m_buffer(buffer)
00120 {
00121     // add information about available headers and whether they have one or multiple,
00122     // comma-separated values.
00123     
00124     //The following response header fields are from RFC 2616 unless otherwise specified.
00125     //Hint: search the web for e.g. 'http "accept-ranges header"' to find information about
00126     //a header field.
00127     static const HeaderFieldTemplate headerFieldTemplates[] = {
00128         {"accept-ranges", false},
00129         {"cache-control", true},
00130         {"connection", true},
00131         {"content-disposition", false}, //is multi-valued in a way, but with ";" separator!
00132         {"content-encoding", true},
00133         {"content-language", true},
00134         {"content-length", false},
00135         {"content-location", false},
00136         {"content-md5", false},
00137         {"content-type", false},
00138         {"date", false},
00139         {"dav", true}, //RFC 2518
00140         {"etag", false},
00141         {"expires", false},
00142         {"keep-alive", false}, //RFC 2068
00143         {"last-modified", false},
00144         {"link", false}, //RFC 2068, multi-valued with ";" separator
00145         {"location", false},
00146         {"p3p", true}, // http://www.w3.org/TR/P3P/
00147         {"pragma", true},
00148         {"proxy-authenticate", false}, //complicated multi-valuedness: quoted commas don't separate
00149                                        //multiple values. we handle this at a higher level.
00150         {"proxy-connection", true}, //inofficial but well-known; to avoid misunderstandings
00151                                     //when using "connection" when talking to a proxy.
00152         {"refresh", false}, //not sure, only found some mailing list posts mentioning it
00153         {"set-cookie", false}, //RFC 2109; the multi-valuedness seems to be usually achieved
00154                                //by sending several instances of this field as opposed to
00155                                //usually comma-separated lists with maybe multiple instances.
00156         {"transfer-encoding", true},
00157         {"upgrade", true},
00158         {"warning", true},
00159         {"www-authenticate", false} //see proxy-authenticate
00160     };
00161 
00162     for (uint i = 0; i < sizeof(headerFieldTemplates) / sizeof(HeaderFieldTemplate); i++) {
00163         const HeaderFieldTemplate &ft = headerFieldTemplates[i];
00164         insert(QByteArray(ft.name), HeaderField(ft.isMultiValued));
00165     }
00166 }
00167 
00168 int HeaderTokenizer::tokenize(int begin, int end)
00169 {
00170     char *buf = m_buffer;  //keep line length in check :/
00171     int idx = begin;
00172     int startIdx = begin; //multi-purpose start of current token
00173     bool multiValuedEndedWithComma = false; //did the last multi-valued line end with a comma?
00174     QByteArray headerKey;
00175     do {
00176         
00177         if (buf[idx] == ' ' || buf [idx] == '\t') {
00178             // line continuation; preserve startIdx except (see below)
00179             if (headerKey.isEmpty()) {
00180                 continue;
00181             }
00182             // turn CR/LF into spaces for later parsing convenience
00183             int backIdx = idx - 1;
00184             while (backIdx >= begin && (buf[backIdx] == '\r' || buf[backIdx] == '\n')) {
00185                 buf[backIdx--] = ' ';
00186             }
00187 
00188             // multiple values, comma-separated: add new value or continue previous?
00189             if (operator[](headerKey).isMultiValued) {
00190                 if (multiValuedEndedWithComma) {
00191                     // start new value; this is almost like no line continuation
00192                     skipSpace(buf, &idx, end);
00193                     startIdx = idx;
00194                 } else {
00195                     // continue previous value; this is tricky. unit tests to the rescue!
00196                     if (operator[](headerKey).beginEnd.last().first == startIdx) {
00197                         // remove entry, it will be re-added because already idx != startIdx
00198                         operator[](headerKey).beginEnd.removeLast();
00199                     } else {
00200                         // no comma, no entry: the prev line was whitespace only - start new value
00201                         skipSpace(buf, &idx, end);
00202                         startIdx = idx;
00203                     }
00204                 }
00205             }
00206 
00207         } else {
00208             // new field
00209             startIdx = idx;
00210             // also make sure that there is at least one char after the colon
00211             while (idx < (end - 1) && buf[idx] != ':' && buf[idx] != '\r' && buf[idx] != '\n') {
00212                 buf[idx] = tolower(buf[idx]);
00213                 idx++;
00214             }
00215             if (buf[idx] != ':') {
00216                 //malformed line: no colon
00217                 headerKey.clear();
00218                 continue;
00219             }
00220             headerKey = QByteArray(&buf[startIdx], idx - startIdx);
00221             if (!contains(headerKey)) {
00222                 //we don't recognize this header line
00223                 headerKey.clear();
00224                 continue;
00225             }
00226             // skip colon & leading whitespace
00227             idx++;
00228             skipSpace(buf, &idx, end);
00229             startIdx = idx;
00230         }
00231 
00232         // we have the name/key of the field, now parse the value
00233         if (!operator[](headerKey).isMultiValued) {
00234         
00235             // scan to end of line
00236             while (idx < end && buf[idx] != '\r' && buf[idx] != '\n') {
00237                 idx++;
00238             }
00239             if (!operator[](headerKey).beginEnd.isEmpty()) {
00240                 // there already is an entry; are we just in a line continuation?
00241                 if (operator[](headerKey).beginEnd.last().first == startIdx) {
00242                     // line continuation: delete previous entry and later insert a new, longer one.
00243                     operator[](headerKey).beginEnd.removeLast();
00244                 }
00245             }
00246             operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
00247             
00248         } else {
00249         
00250             // comma-separated list
00251             while (true) {
00252                 //skip one value
00253                 while (idx < end && buf[idx] != '\r' && buf[idx] != '\n' && buf[idx] != ',') {
00254                     idx++;
00255                 }
00256                 if (idx != startIdx) {
00257                     operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));                    
00258                 }
00259                 multiValuedEndedWithComma = buf[idx] == ',';
00260                 //skip comma(s) and leading whitespace, if any respectively
00261                 while (idx < end && buf[idx] == ',') {
00262                     idx++;
00263                 }
00264                 skipSpace(buf, &idx, end);
00265                 //next value or end-of-line / end of header?
00266                 if (buf[idx] >= end || buf[idx] == '\r' || buf[idx] == '\n') {
00267                     break;
00268                 }
00269                 //next value
00270                 startIdx = idx;
00271             }
00272         }
00273     } while (nextLine(buf, &idx, end));
00274     return idx;
00275 }
00276 
00277 
00278 TokenIterator HeaderTokenizer::iterator(const char *key)
00279 {
00280     QByteArray keyBa = QByteArray::fromRawData(key, strlen(key));
00281     if (contains(keyBa)) {
00282         return TokenIterator(value(keyBa).beginEnd, m_buffer);
00283     } else {
00284         return TokenIterator(m_nullTokens, m_buffer);
00285     }
00286 }

KIOSlave

Skip menu "KIOSlave"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.7
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal