libdap++ Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 // Copyright (c) 1996, California Institute of Technology. 00027 // ALL RIGHTS RESERVED. U.S. Government Sponsorship acknowledged. 00028 // 00029 // Please read the full copyright notice in the file COPYRIGHT_URI 00030 // in this directory. 00031 // 00032 // Author: Todd Karakashian, NASA/Jet Propulsion Laboratory 00033 // Todd.K.Karakashian@jpl.nasa.gov 00034 // 00035 // $RCSfile: escaping.cc,v $ - Miscellaneous routines for OPeNDAP HDF server 00036 // 00037 // These two routines are for escaping/unescaping strings that are identifiers 00038 // in DAP2 00039 // id2www() -- escape (using WWW hex codes) non-allowable characters in a 00040 // DAP2 identifier 00041 // www2id() -- given an WWW hexcode escaped identifier, restore it 00042 // 00043 // These two routines are for escaping/unescaping strings storing attribute 00044 // values. They use traditional octal escapes (\nnn) because they are 00045 // intended to be viewed by a user 00046 // escattr() -- escape (using traditional octal backslash) non-allowable 00047 // characters in the value of a DAP2 attribute 00048 // unescattr() -- given an octally escaped string, restore it 00049 // 00050 // These are routines used by the above, not intended to be called directly: 00051 // 00052 // hexstring() 00053 // unhexstring() 00054 // octstring() 00055 // unoctstring() 00056 // 00057 // -Todd 00058 00059 #include <ctype.h> 00060 00061 #include <iomanip> 00062 #include <string> 00063 #include <sstream> 00064 00065 #include "GNURegex.h" 00066 #include "Error.h" 00067 #include "InternalErr.h" 00068 // #define DODS_DEBUG 00069 #include "debug.h" 00070 00071 using namespace std; 00072 00073 namespace libdap { 00074 00075 // The next four functions were originally defined static, but I removed that 00076 // to make testing them (see generalUtilTest.cc) easier to write. 5/7/2001 00077 // jhrg 00078 00079 string 00080 hexstring(unsigned char val) 00081 { 00082 ostringstream buf; 00083 buf << hex << setw(2) << setfill('0') 00084 << static_cast<unsigned int>(val); 00085 00086 return buf.str(); 00087 } 00088 00089 string 00090 unhexstring(string s) 00091 { 00092 int val; 00093 istringstream ss(s); 00094 ss >> hex >> val; 00095 char tmp_str[2]; 00096 tmp_str[0] = static_cast<char>(val); 00097 tmp_str[1] = '\0'; 00098 return string(tmp_str); 00099 } 00100 00101 string 00102 octstring(unsigned char val) 00103 { 00104 ostringstream buf; 00105 buf << oct << setw(3) << setfill('0') 00106 << static_cast<unsigned int>(val); 00107 00108 return buf.str(); 00109 } 00110 00111 string 00112 unoctstring(string s) 00113 { 00114 int val; 00115 00116 istringstream ss(s); 00117 ss >> oct >> val; 00118 00119 DBG(cerr << "unoctstring: " << val << endl); 00120 00121 char tmp_str[2]; 00122 tmp_str[0] = static_cast<char>(val); 00123 tmp_str[1] = '\0'; 00124 return string(tmp_str); 00125 } 00126 00151 string 00152 id2www(string in, const string &allowable) 00153 { 00154 string::size_type i = 0; 00155 00156 while ((i = in.find_first_not_of(allowable, i)) != string::npos) { 00157 in.replace(i, 1, "%" + hexstring(in[i])); 00158 i++; 00159 } 00160 00161 return in; 00162 } 00163 00174 string 00175 id2www_ce(string in, const string &allowable) 00176 { 00177 return id2www(in, allowable); 00178 } 00179 00212 string 00213 www2id(const string &in, const string &escape, const string &except) 00214 { 00215 string::size_type i = 0; 00216 string res = in; 00217 while ((i = res.find_first_of(escape, i)) != string::npos) { 00218 if (except.find(res.substr(i, 3)) != string::npos) { 00219 i += 3; 00220 continue; 00221 } 00222 res.replace(i, 3, unhexstring(res.substr(i + 1, 2))); 00223 } 00224 00225 return res; 00226 } 00227 00228 static string 00229 entity(char c) 00230 { 00231 switch (c) { 00232 case '>': return ">"; 00233 case '<': return "<"; 00234 case '&': return "&"; 00235 case '\'': return "'"; 00236 case '\"': return """; 00237 default: 00238 throw InternalErr(__FILE__, __LINE__, "Unrecognized character."); 00239 } 00240 } 00241 00248 string 00249 id2xml(string in, const string ¬_allowed) 00250 { 00251 string::size_type i = 0; 00252 00253 while ((i = in.find_first_of(not_allowed, i)) != string::npos) { 00254 in.replace(i, 1, entity(in[i])); 00255 i++; 00256 } 00257 00258 return in; 00259 } 00260 00266 string 00267 xml2id(string in) 00268 { 00269 string::size_type i = 0; 00270 00271 while ((i = in.find(">", i)) != string::npos) 00272 in.replace(i, 4, ">"); 00273 00274 i = 0; 00275 while ((i = in.find("<", i)) != string::npos) 00276 in.replace(i, 4, "<"); 00277 00278 i = 0; 00279 while ((i = in.find("&", i)) != string::npos) 00280 in.replace(i, 5, "&"); 00281 00282 i = 0; 00283 while ((i = in.find("'", i)) != string::npos) 00284 in.replace(i, 6, "'"); 00285 00286 i = 0; 00287 while ((i = in.find(""", i)) != string::npos) 00288 in.replace(i, 6, "\""); 00289 00290 return in; 00291 } 00292 00298 string 00299 esc2underscore(string s) 00300 { 00301 string::size_type pos; 00302 while ((pos = s.find('%')) != string::npos) 00303 s.replace(pos, 3, "_"); 00304 00305 return s; 00306 } 00307 00308 00312 string 00313 escattr(string s) 00314 { 00315 const string printable = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~`!@#$%^&*()_-+={[}]|\\:;<,>.?/'\""; 00316 const string ESC = "\\"; 00317 const string DOUBLE_ESC = ESC + ESC; 00318 const string QUOTE = "\""; 00319 const string ESCQUOTE = ESC + QUOTE; 00320 00321 // escape non-printing characters with octal escape 00322 string::size_type ind = 0; 00323 while ((ind = s.find_first_not_of(printable, ind)) != s.npos) 00324 s.replace(ind, 1, ESC + octstring(s[ind])); 00325 00326 // escape \ with a second backslash 00327 ind = 0; 00328 while ((ind = s.find(ESC, ind)) != s.npos) { 00329 s.replace(ind, 1, DOUBLE_ESC); 00330 ind += DOUBLE_ESC.length(); 00331 } 00332 00333 // escape " with backslash 00334 ind = 0; 00335 while ((ind = s.find(QUOTE, ind)) != s.npos) { 00336 s.replace(ind, 1, ESCQUOTE); 00337 ind += ESCQUOTE.length(); 00338 } 00339 00340 return s; 00341 } 00342 00351 string 00352 unescattr(string s) 00353 { 00354 Regex octal("\\\\[0-3][0-7][0-7]"); // matches 4 characters 00355 Regex esc_quote("\\\\\""); // matches 3 characters 00356 Regex esc_esc("\\\\\\\\"); // matches 2 characters 00357 const string ESC = "\\"; 00358 const string QUOTE = "\""; 00359 int matchlen; 00360 unsigned int index; 00361 00362 DBG(cerr << "0XX" << s << "XXX" << endl); 00363 // unescape any escaped backslashes 00364 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0); 00365 while (index < s.length()) { 00366 DBG(cerr << "1aXX" << s << "XXX index: " << index << endl); 00367 s.replace(index, 2, ESC); 00368 DBG(cerr << "1bXX" << s << "XXX index: " << index << endl); 00369 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0); 00370 } 00371 00372 // unescape any escaped double quote characters 00373 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0); 00374 while (index < s.length()) { 00375 s.replace(index, 2, QUOTE); 00376 DBG(cerr << "2XX" << s << "XXX index: " << index << endl); 00377 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0); 00378 } 00379 00380 // unescape octal characters 00381 index = octal.search(s.c_str(), s.length(), matchlen, 0); 00382 while (index < s.length()) { 00383 s.replace(index, 4, unoctstring(s.substr(index + 1, 3))); 00384 DBG(cerr << "3XX" << s << "XXX index: " << index << endl); 00385 index = octal.search(s.c_str(), s.length(), matchlen, 0); 00386 } 00387 00388 DBG(cerr << "4XX" << s << "XXX" << endl); 00389 return s; 00390 } 00391 00392 string 00393 munge_error_message(string msg) 00394 { 00395 // First, add enclosing quotes if needed. 00396 if (*msg.begin() != '"') 00397 msg.insert(msg.begin(), '"'); 00398 if (*(msg.end() - 1) != '"') 00399 msg += "\""; 00400 00401 // Now escape any internal double quotes that aren't escaped. 00402 string::iterator miter; 00403 for (miter = msg.begin() + 1; miter != msg.end() - 1; miter++) 00404 if (*miter == '"' && *(miter - 1) != '\\') 00405 miter = msg.insert(miter, '\\'); 00406 00407 return msg; 00408 } 00409 00410 } // namespace libdap 00411