libdap++ Updated for version 3.8.2

HTTPCache.cc

Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #include "config.h"
00027 
00028 //#define DODS_DEBUG
00029 //#define DODS_DEBUG2
00030 
00031 #include <pthread.h>
00032 #include <limits.h>
00033 #include <unistd.h>   // for stat
00034 #include <sys/types.h>  // for stat and mkdir
00035 #include <sys/stat.h>
00036 
00037 #include <cstring>
00038 #include <iostream>
00039 #include <sstream>
00040 #include <algorithm>
00041 #include <iterator>
00042 #include <set>
00043 
00044 #include "Error.h"
00045 #include "InternalErr.h"
00046 #include "ResponseTooBigErr.h"
00047 #ifndef WIN32
00048 #include "SignalHandler.h"
00049 #endif
00050 #include "HTTPCacheInterruptHandler.h"
00051 #include "HTTPCacheTable.h"
00052 #include "HTTPCache.h"
00053 
00054 #include "util_mit.h"
00055 #include "debug.h"
00056 
00057 using namespace std;
00058 
00059 namespace libdap {
00060 
00061 HTTPCache *HTTPCache::_instance = 0;
00062 
00063 // instance_mutex is used to ensure that only one instance is created.
00064 // That is, it protects the body of the HTTPCache::instance() method. This
00065 // mutex is initialized from within the static function once_init_routine()
00066 // and the call to that takes place using pthread_once_init() where the mutex
00067 // once_block is used to protect that call. All of this ensures that no matter
00068 // how many threads call the instance() method, only one instance is ever
00069 // made.
00070 static pthread_mutex_t instance_mutex;
00071 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
00072 
00073 #ifdef WIN32
00074 #include <direct.h>
00075 #include <time.h>
00076 #include <fcntl.h>
00077 #define MKDIR(a,b) _mkdir((a))
00078 #define UMASK(a) _umask((a))
00079 #define REMOVE(a) remove((a))
00080 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00081 #define DIR_SEPARATOR_CHAR '\\'
00082 #define DIR_SEPARATOR_STR "\\"
00083 #else
00084 #define MKDIR(a,b) mkdir((a), (b))
00085 #define UMASK(a) umask((a))
00086 #define REMOVE(a) remove((a))
00087 #define MKSTEMP(a) mkstemp((a))
00088 #define DIR_SEPARATOR_CHAR '/'
00089 #define DIR_SEPARATOR_STR "/"
00090 #endif
00091 
00092 #ifdef WIN32
00093 #define CACHE_LOC "\\tmp\\"
00094 #define CACHE_ROOT "dods-cache\\"
00095 #else
00096 #define CACHE_LOC "/tmp/"
00097 #define CACHE_ROOT "dods-cache/"
00098 #endif
00099 #define CACHE_INDEX ".index"
00100 #define CACHE_LOCK ".lock"
00101 #define CACHE_META ".meta"
00102 //#define CACHE_EMPTY_ETAG "@cache@"
00103 
00104 #define NO_LM_EXPIRATION 24*3600 // 24 hours
00105 
00106 #define DUMP_FREQUENCY 10 // Dump index every x loads
00107 
00108 #define MEGA 0x100000L
00109 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
00110 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
00111 #define CACHE_GC_PCT 10  // 10% of cache size free after GC
00112 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
00113 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
00114 
00115 static void
00116 once_init_routine()
00117 {
00118     int status;
00119     status = INIT(&instance_mutex);
00120 
00121     if (status != 0)
00122         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00123 }
00124 
00153 HTTPCache *
00154 HTTPCache::instance(const string &cache_root, bool force)
00155 {
00156     LOCK(&instance_mutex);
00157     DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
00158         << "... ");
00159 
00160     try {
00161         if (!_instance) {
00162             _instance = new HTTPCache(cache_root, force);
00163 
00164             DBG(cerr << "New instance: " << _instance << ", cache root: "
00165                 << _instance->d_cache_root << endl);
00166 
00167             atexit(delete_instance);
00168 
00169 #ifndef WIN32
00170             // Register the interrupt handler. If we've already registered
00171             // one, barf. If this becomes a problem, hack SignalHandler so
00172             // that we can chain these handlers... 02/10/04 jhrg
00173             //
00174             // Technically we're leaking memory here. However, since this
00175             // class is a singleton, we know that only three objects will
00176             // ever be created and they will all exist until the process
00177             // exits. We can let this slide... 02/12/04 jhrg
00178             EventHandler *old_eh = SignalHandler::instance()->register_handler
00179                                    (SIGINT, new HTTPCacheInterruptHandler);
00180             if (old_eh) {
00181                 SignalHandler::instance()->register_handler(SIGINT, old_eh);
00182                 throw SignalHandlerRegisteredErr(
00183                     "Could not register event handler for SIGINT without superseding an existing one.");
00184             }
00185 
00186             old_eh = SignalHandler::instance()->register_handler
00187                      (SIGPIPE, new HTTPCacheInterruptHandler);
00188             if (old_eh) {
00189                 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
00190                 throw SignalHandlerRegisteredErr(
00191                     "Could not register event handler for SIGPIPE without superseding an existing one.");
00192             }
00193 
00194             old_eh = SignalHandler::instance()->register_handler
00195                      (SIGTERM, new HTTPCacheInterruptHandler);
00196             if (old_eh) {
00197                 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
00198                 throw SignalHandlerRegisteredErr(
00199                     "Could not register event handler for SIGTERM without superseding an existing one.");
00200             }
00201 #endif
00202         }
00203     }
00204     catch (...) {
00205         DBG2(cerr << "The constructor threw an Error!" << endl);
00206         UNLOCK(&instance_mutex);
00207         throw;
00208     }
00209 
00210     UNLOCK(&instance_mutex);
00211     DBGN(cerr << "returning " << hex << _instance << dec << endl);
00212 
00213     return _instance;
00214 }
00215 
00219 void
00220 HTTPCache::delete_instance()
00221 {
00222     DBG(cerr << "Entering delete_instance()..." << endl);
00223     if (HTTPCache::_instance) {
00224         DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
00225         delete HTTPCache::_instance;
00226         HTTPCache::_instance = 0;
00227     }
00228 
00229     DBG(cerr << "Exiting delete_instance()" << endl);
00230 }
00231 
00246 HTTPCache::HTTPCache(string cache_root, bool force) :
00247         d_locked_open_file(0),
00248         d_cache_enabled(false),
00249         d_cache_protected(false),
00250         d_expire_ignored(false),
00251         d_always_validate(false),
00252         d_total_size(CACHE_TOTAL_SIZE * MEGA),
00253         d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
00254         d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
00255         d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
00256         d_default_expiration(NO_LM_EXPIRATION),
00257         d_max_age(-1),
00258         d_max_stale(-1),
00259         d_min_fresh(-1),
00260         d_http_cache_table(0)
00261 {
00262     DBG(cerr << "Entering the constructor for " << this << "... ");
00263 
00264         int status = pthread_once(&once_block, once_init_routine);
00265         if (status != 0)
00266                 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00267 
00268         INIT(&d_cache_mutex);
00269 
00270         // This used to throw an Error object if we could not get the
00271         // single user lock. However, that results in an invalid object. It's
00272         // better to have an instance that has default values. If we cannot get
00273         // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
00274         //
00275         // I fixed this block so that the cache root is set before we try to get
00276         // the single user lock. That was the fix for bug #661. To make that
00277         // work, I had to move the call to create_cache_root out of
00278         // set_cache_root(). 09/08/03 jhrg
00279 
00280         set_cache_root(cache_root);
00281         int block_size;
00282 
00283         if (!get_single_user_lock(force))
00284                 throw Error("Could not get single user lock for the cache");
00285 
00286 #ifdef WIN32
00287         //  Windows is unable to provide us this information.  4096 appears
00288         //  a best guess.  It is likely to be in the range [2048, 8192] on
00289         //  windows, but will the level of truth of that statement vary over
00290         //  time ?
00291         block_size = 4096;
00292 #else
00293         struct stat s;
00294         if (stat(cache_root.c_str(), &s) == 0)
00295                 block_size = s.st_blksize;
00296         else
00297                 throw Error("Could not set file system block size.");
00298 #endif
00299         d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
00300         d_cache_enabled = true;
00301 
00302         DBGN(cerr << "exiting" << endl);
00303 }
00304 
00317 HTTPCache::~HTTPCache()
00318 {
00319     DBG(cerr << "Entering the destructor for " << this << "... ");
00320 
00321     try {
00322         if (startGC())
00323             perform_garbage_collection();
00324 
00325         d_http_cache_table->cache_index_write();
00326     }
00327     catch (Error &e) {
00328         // If the cache index cannot be written, we've got problems. However,
00329         // unless we're debugging, still free up the cache table in memory.
00330         // How should we let users know they cache index is not being
00331         // written?? 10/03/02 jhrg
00332         DBG(cerr << e.get_error_message() << endl);
00333     }
00334 
00335     delete d_http_cache_table;
00336 
00337     release_single_user_lock();
00338 
00339     DBGN(cerr << "exiting destructor." << endl);
00340     DESTROY(&d_cache_mutex);
00341 }
00342 
00343 
00347 
00351 bool
00352 HTTPCache::stopGC() const
00353 {
00354     return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
00355 }
00356 
00363 bool
00364 HTTPCache::startGC() const
00365 {
00366     DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
00367     return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
00368 }
00369 
00384 void
00385 HTTPCache::perform_garbage_collection()
00386 {
00387     DBG(cerr << "Performing garbage collection" << endl);
00388 
00389     // Remove all the expired responses.
00390     expired_gc();
00391 
00392     // Remove entries larger than max_entry_size.
00393     too_big_gc();
00394 
00395     // Remove entries starting with zero hits, 1, ..., until stopGC()
00396     // returns true.
00397     hits_gc();
00398 }
00399 
00405 void
00406 HTTPCache::expired_gc()
00407 {
00408     if (!d_expire_ignored) {
00409         d_http_cache_table->delete_expired_entries();
00410     }
00411 }
00412 
00429 void
00430 HTTPCache::hits_gc()
00431 {
00432     int hits = 0;
00433 
00434     if (startGC()) {
00435                 while (!stopGC()) {
00436                         d_http_cache_table->delete_by_hits(hits);
00437                         hits++;
00438                 }
00439         }
00440 }
00441 
00446 void HTTPCache::too_big_gc() {
00447         if (startGC())
00448                 d_http_cache_table->delete_by_size(d_max_entry_size);
00449 }
00450 
00452 
00463 bool HTTPCache::get_single_user_lock(bool force) {
00464         if (!d_locked_open_file) {
00465                 FILE * fp = NULL;
00466 
00467                 try {
00468                         // It's OK to call create_cache_root if the directory already
00469                         // exists.
00470                         create_cache_root(d_cache_root);
00471                 }
00472                 catch (Error &e) {
00473                         // We need to catch and return false because this method is
00474                         // called from a ctor and throwing at this point will result in a
00475                         // partially constructed object. 01/22/04 jhrg
00476                         return false;
00477                 }
00478 
00479                 // Try to read the lock file. If we can open for reading, it exists.
00480                 string lock = d_cache_root + CACHE_LOCK;
00481                 if ((fp = fopen(lock.c_str(), "r")) != NULL) {
00482                         int res = fclose(fp);
00483                         if (res) {
00484                                 DBG(cerr << "Failed to close " << (void *)fp << endl);
00485                         }
00486                         if (force)
00487                                 REMOVE(lock.c_str());
00488                         else
00489                                 return false;
00490                 }
00491 
00492                 if ((fp = fopen(lock.c_str(), "w")) == NULL)
00493                         return false;
00494 
00495                 d_locked_open_file = fp;
00496                 return true;
00497         }
00498 
00499         return false;
00500 }
00501 
00504 void
00505 HTTPCache::release_single_user_lock()
00506 {
00507     if (d_locked_open_file) {
00508         int res = fclose(d_locked_open_file);
00509         if (res) {
00510             DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
00511         }
00512         d_locked_open_file = 0;
00513     }
00514 
00515     string lock = d_cache_root + CACHE_LOCK;
00516     REMOVE(lock.c_str());
00517 }
00518 
00521 
00525 string
00526 HTTPCache::get_cache_root() const
00527 {
00528     return d_cache_root;
00529 }
00530 
00531 
00540 void
00541 HTTPCache::create_cache_root(const string &cache_root)
00542 {
00543     struct stat stat_info;
00544     string::size_type cur = 0;
00545 
00546 #ifdef WIN32
00547     cur = cache_root[1] == ':' ? 3 : 1;
00548 #else
00549     cur = 1;
00550 #endif
00551     while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
00552         string dir = cache_root.substr(0, cur);
00553         if (stat(dir.c_str(), &stat_info) == -1) {
00554             DBG2(cerr << "Cache....... Creating " << dir << endl);
00555             mode_t mask = UMASK(0);
00556             if (MKDIR(dir.c_str(), 0777) < 0) {
00557                 DBG2(cerr << "Error: can't create." << endl);
00558                 UMASK(mask);
00559                 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
00560             }
00561             UMASK(mask);
00562         }
00563         else {
00564             DBG2(cerr << "Cache....... Found " << dir << endl);
00565         }
00566         cur++;
00567     }
00568 }
00569 
00584 void
00585 HTTPCache::set_cache_root(const string &root)
00586 {
00587     if (root != "") {
00588         d_cache_root = root;
00589         // cache root should end in /.
00590         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00591             d_cache_root += DIR_SEPARATOR_CHAR;
00592     }
00593     else {
00594         // If no cache root has been indicated then look for a suitable
00595         // location.
00596         char * cr = (char *) getenv("DODS_CACHE");
00597         if (!cr) cr = (char *) getenv("TMP");
00598         if (!cr) cr = (char *) getenv("TEMP");
00599         if (!cr) cr = CACHE_LOC;
00600 
00601         d_cache_root = cr;
00602         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00603             d_cache_root += DIR_SEPARATOR_CHAR;
00604 
00605         d_cache_root += CACHE_ROOT;
00606     }
00607 
00608     // Test d_hhtp_cache_table because this method can be called before that
00609     // instance is created and also can be called later to cahnge the cache
00610     // root. jhrg 05.14.08
00611     if (d_http_cache_table)
00612         d_http_cache_table->set_cache_root(d_cache_root);
00613 }
00614 
00626 void
00627 HTTPCache::set_cache_enabled(bool mode)
00628 {
00629     lock_cache_interface();
00630 
00631     d_cache_enabled = mode;
00632 
00633     unlock_cache_interface();
00634 }
00635 
00638 bool
00639 HTTPCache::is_cache_enabled() const
00640 {
00641     DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
00642          << endl);
00643     return d_cache_enabled;
00644 }
00645 
00656 void
00657 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
00658 {
00659     lock_cache_interface();
00660 
00661     d_cache_disconnected = mode;
00662 
00663     unlock_cache_interface();
00664 }
00665 
00668 CacheDisconnectedMode
00669 HTTPCache::get_cache_disconnected() const
00670 {
00671     return d_cache_disconnected;
00672 }
00673 
00682 void
00683 HTTPCache::set_expire_ignored(bool mode)
00684 {
00685     lock_cache_interface();
00686 
00687     d_expire_ignored = mode;
00688 
00689     unlock_cache_interface();
00690 }
00691 
00692 /* Is the cache ignoring Expires headers returned with responses that have
00693    been cached? */
00694 
00695 bool
00696 HTTPCache::is_expire_ignored() const
00697 {
00698     return d_expire_ignored;
00699 }
00700 
00716 void
00717 HTTPCache::set_max_size(unsigned long size)
00718 {
00719     lock_cache_interface();
00720 
00721     try {
00722         unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
00723                                  MIN_CACHE_TOTAL_SIZE * MEGA :
00724                                  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
00725         unsigned long old_size = d_total_size;
00726         d_total_size = new_size;
00727         d_folder_size = d_total_size / CACHE_FOLDER_PCT;
00728         d_gc_buffer = d_total_size / CACHE_GC_PCT;
00729 
00730         if (new_size < old_size && startGC()) {
00731             perform_garbage_collection();
00732             d_http_cache_table->cache_index_write();
00733         }
00734     }
00735     catch (...) {
00736         unlock_cache_interface();
00737         DBGN(cerr << "Unlocking interface." << endl);
00738         throw;
00739     }
00740 
00741     DBG2(cerr << "Cache....... Total cache size: " << d_total_size
00742          << " with " << d_folder_size
00743          << " bytes for meta information and folders and at least "
00744          << d_gc_buffer << " bytes free after every gc" << endl);
00745 
00746     unlock_cache_interface();
00747 }
00748 
00751 unsigned long
00752 HTTPCache::get_max_size() const
00753 {
00754     return d_total_size / MEGA;
00755 }
00756 
00765 void
00766 HTTPCache::set_max_entry_size(unsigned long size)
00767 {
00768     lock_cache_interface();
00769 
00770     try {
00771         unsigned long new_size = size * MEGA;
00772         if (new_size > 0 && new_size < d_total_size - d_folder_size) {
00773             unsigned long old_size = d_max_entry_size;
00774             d_max_entry_size = new_size;
00775             if (new_size < old_size && startGC()) {
00776                 perform_garbage_collection();
00777                 d_http_cache_table->cache_index_write();
00778             }
00779         }
00780     }
00781     catch (...) {
00782         unlock_cache_interface();
00783         throw;
00784     }
00785 
00786     DBG2(cerr << "Cache...... Max entry cache size is "
00787          << d_max_entry_size << endl);
00788 
00789     unlock_cache_interface();
00790 }
00791 
00796 unsigned long
00797 HTTPCache::get_max_entry_size() const
00798 {
00799     return d_max_entry_size / MEGA;
00800 }
00801 
00812 void
00813 HTTPCache::set_default_expiration(const int exp_time)
00814 {
00815     lock_cache_interface();
00816 
00817     d_default_expiration = exp_time;
00818 
00819     unlock_cache_interface();
00820 }
00821 
00824 int
00825 HTTPCache::get_default_expiration() const
00826 {
00827     return d_default_expiration;
00828 }
00829 
00834 void
00835 HTTPCache::set_always_validate(bool validate)
00836 {
00837     d_always_validate = validate;
00838 }
00839 
00843 bool
00844 HTTPCache::get_always_validate() const
00845 {
00846     return d_always_validate;
00847 }
00848 
00865 void
00866 HTTPCache::set_cache_control(const vector<string> &cc)
00867 {
00868     lock_cache_interface();
00869 
00870     try {
00871         d_cache_control = cc;
00872 
00873         vector<string>::const_iterator i;
00874         for (i = cc.begin(); i != cc.end(); ++i) {
00875             string header = (*i).substr(0, (*i).find(':'));
00876             string value = (*i).substr((*i).find(": ") + 2);
00877             if (header != "Cache-Control") {
00878                 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
00879             }
00880             else {
00881                 if (value == "no-cache" || value == "no-store")
00882                     d_cache_enabled = false;
00883                 else if (value.find("max-age") != string::npos) {
00884                     string max_age = value.substr(value.find("=" + 1));
00885                     d_max_age = parse_time(max_age.c_str());
00886                 }
00887                 else if (value == "max-stale")
00888                     d_max_stale = 0; // indicates will take anything;
00889                 else if (value.find("max-stale") != string::npos) {
00890                     string max_stale = value.substr(value.find("=" + 1));
00891                     d_max_stale = parse_time(max_stale.c_str());
00892                 }
00893                 else if (value.find("min-fresh") != string::npos) {
00894                     string min_fresh = value.substr(value.find("=" + 1));
00895                     d_min_fresh = parse_time(min_fresh.c_str());
00896                 }
00897             }
00898         }
00899     }
00900     catch (...) {
00901         unlock_cache_interface();
00902         throw;
00903     }
00904 
00905     unlock_cache_interface();
00906 }
00907 
00908 
00913 vector<string>
00914 HTTPCache::get_cache_control()
00915 {
00916     return d_cache_control;
00917 }
00918 
00920 
00929 bool
00930 HTTPCache::is_url_in_cache(const string &url)
00931 {
00932     DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
00933 
00934     HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
00935     bool status = entry != 0;
00936     if (entry) {
00937 #if 0
00938         entry->unlock();
00939 #endif
00940         entry->unlock_read_response();
00941     }
00942     return  status;
00943 }
00944 
00950 bool
00951 is_hop_by_hop_header(const string &header)
00952 {
00953     return header.find("Connection") != string::npos
00954            || header.find("Keep-Alive") != string::npos
00955            || header.find("Proxy-Authenticate") != string::npos
00956            || header.find("Proxy-Authorization") != string::npos
00957            || header.find("Transfer-Encoding") != string::npos
00958            || header.find("Upgrade") != string::npos;
00959 }
00960 
00972 void
00973 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
00974 {
00975     string fname = cachename + CACHE_META;
00976     d_open_files.push_back(fname);
00977 
00978     FILE *dest = fopen(fname.c_str(), "w");
00979     if (!dest) {
00980         throw InternalErr(__FILE__, __LINE__,
00981                           "Could not open named cache entry file.");
00982     }
00983 
00984     vector<string>::const_iterator i;
00985     for (i = headers.begin(); i != headers.end(); ++i) {
00986         if (!is_hop_by_hop_header(*i)) {
00987             fwrite((*i).c_str(), (*i).size(), 1, dest);
00988             fwrite("\n", 1, 1, dest);
00989         }
00990     }
00991 
00992     int res = fclose(dest);
00993     if (res) {
00994         DBG(cerr << "HTTPCache::write_metadata - Failed to close "
00995             << dest << endl);
00996     }
00997 
00998     d_open_files.pop_back();
00999 }
01000 
01011 void
01012 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
01013 {
01014     FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
01015     if (!md) {
01016         throw InternalErr(__FILE__, __LINE__,
01017                           "Could not open named cache entry meta data file.");
01018     }
01019 
01020     char line[1024];
01021     while (!feof(md) && fgets(line, 1024, md)) {
01022         line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
01023         headers.push_back(string(line));
01024     }
01025 
01026     int res = fclose(md);
01027     if (res) {
01028         DBG(cerr << "HTTPCache::read_metadata - Failed to close "
01029             << md << endl);
01030     }
01031 }
01032 
01054 int
01055 HTTPCache::write_body(const string &cachename, const FILE *src)
01056 {
01057     d_open_files.push_back(cachename);
01058 
01059     FILE *dest = fopen(cachename.c_str(), "wb");
01060     if (!dest) {
01061         throw InternalErr(__FILE__, __LINE__,
01062                           "Could not open named cache entry file.");
01063     }
01064 
01065     // Read and write in 1k blocks; an attempt at doing this efficiently.
01066     // 09/30/02 jhrg
01067     char line[1024];
01068     size_t n;
01069     int total = 0;
01070     while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
01071         total += fwrite(line, 1, n, dest);
01072         DBG2(sleep(3));
01073     }
01074 
01075     if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
01076         int res = fclose(dest);
01077         res = res & unlink(cachename.c_str());
01078         if (res) {
01079             DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
01080                 << dest << endl);
01081         }
01082         throw InternalErr(__FILE__, __LINE__,
01083                           "I/O error transferring data to the cache.");
01084     }
01085 
01086     rewind(const_cast<FILE *>(src));
01087 
01088     int res = fclose(dest);
01089     if (res) {
01090         DBG(cerr << "HTTPCache::write_body - Failed to close "
01091             << dest << endl);
01092     }
01093 
01094     d_open_files.pop_back();
01095 
01096     return total;
01097 }
01098 
01107 FILE *
01108 HTTPCache::open_body(const string &cachename)
01109 {
01110         FILE *src = fopen(cachename.c_str(), "rb");             // Read only
01111         if (!src)
01112         throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
01113 
01114     return src;
01115 }
01116 
01142 bool
01143 HTTPCache::cache_response(const string &url, time_t request_time,
01144                           const vector<string> &headers, const FILE *body)
01145 {
01146     lock_cache_interface();
01147 
01148     DBG(cerr << "Caching url: " << url << "." << endl);
01149 
01150     try {
01151         // If this is not an http or https URL, don't cache.
01152         if (url.find("http:") == string::npos &&
01153             url.find("https:") == string::npos) {
01154             unlock_cache_interface();
01155             return false;
01156         }
01157 
01158         // This does nothing if url is not already in the cache. It's
01159         // more efficient to do this than to first check and see if the entry
01160         // exists. 10/10/02 jhrg
01161         d_http_cache_table->remove_entry_from_cache_table(url);
01162 
01163         HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url);
01164         entry->lock_write_response();
01165 
01166         try {
01167             d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
01168             if (entry->is_no_cache()) {
01169                 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
01170                     << "(" << url << ")" << endl);
01171                 entry->unlock_write_response();
01172                 delete entry; entry = 0;
01173                 unlock_cache_interface();
01174                 return false;
01175             }
01176 
01177             // corrected_initial_age, freshness_lifetime, response_time.
01178             d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
01179 
01180             d_http_cache_table->create_location(entry); // cachename, cache_body_fd
01181             // move these write function to cache table
01182             entry->set_size(write_body(entry->get_cachename(), body));
01183             write_metadata(entry->get_cachename(), headers);
01184             d_http_cache_table->add_entry_to_cache_table(entry);
01185             entry->unlock_write_response();
01186         }
01187         catch (ResponseTooBigErr &e) {
01188             // Oops. Bummer. Clean up and exit.
01189             DBG(cerr << e.get_error_message() << endl);
01190             REMOVE(entry->get_cachename().c_str());
01191             REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
01192             DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
01193                 << ")" << endl);
01194             entry->unlock_write_response();
01195             delete entry; entry = 0;
01196             unlock_cache_interface();
01197             return false;
01198         }
01199 
01200         if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
01201             if (startGC())
01202                 perform_garbage_collection();
01203 
01204             d_http_cache_table->cache_index_write(); // resets new_entries
01205         }
01206     }
01207     catch (...) {
01208         unlock_cache_interface();
01209         throw;
01210     }
01211 
01212     unlock_cache_interface();
01213 
01214     return true;
01215 }
01216 
01235 vector<string>
01236 HTTPCache::get_conditional_request_headers(const string &url)
01237 {
01238     lock_cache_interface();
01239 
01240     HTTPCacheTable::CacheEntry *entry = 0;
01241     vector<string> headers;
01242 
01243     DBG(cerr << "Getting conditional request headers for " << url << endl);
01244 
01245     try {
01246         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01247         if (!entry)
01248             throw Error("There is no cache entry for the URL: " + url);
01249 
01250         if (entry->get_etag() != "")
01251             headers.push_back(string("If-None-Match: ") + entry->get_etag());
01252 
01253         if (entry->get_lm() > 0) {
01254                 time_t lm = entry->get_lm();
01255             headers.push_back(string("If-Modified-Since: ")
01256                               + date_time_str(&lm));
01257         }
01258         else if (entry->get_max_age() > 0) {
01259                 time_t max_age = entry->get_max_age();
01260             headers.push_back(string("If-Modified-Since: ")
01261                               + date_time_str(&max_age));
01262         }
01263         else if (entry->get_expires() > 0) {
01264                 time_t expires = entry->get_expires();
01265             headers.push_back(string("If-Modified-Since: ")
01266                               + date_time_str(&expires));
01267         }
01268 #if 0
01269                 entry->unlock();
01270 #endif
01271                 entry->unlock_read_response();
01272             unlock_cache_interface();
01273     }
01274     catch (...) {
01275                 unlock_cache_interface();
01276                 if (entry) {
01277 #if 0
01278                     entry->unlock();
01279 #endif
01280                     entry->unlock_read_response();
01281                 }
01282                 throw;
01283         }
01284 
01285     return headers;
01286 }
01287 
01291 struct HeaderLess: binary_function<const string&, const string&, bool>
01292 {
01293     bool operator()(const string &s1, const string &s2) const {
01294         return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
01295     }
01296 };
01297 
01311 void
01312 HTTPCache::update_response(const string &url, time_t request_time,
01313                            const vector<string> &headers)
01314 {
01315     lock_cache_interface();
01316 
01317     HTTPCacheTable::CacheEntry *entry = 0;
01318     DBG(cerr << "Updating the response headers for: " << url << endl);
01319 
01320     try {
01321         entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
01322         if (!entry)
01323             throw Error("There is no cache entry for the URL: " + url);
01324 
01325         // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
01326         d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
01327 
01328         // Update corrected_initial_age, freshness_lifetime, response_time.
01329         d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
01330 
01331         // Merge the new headers with those in the persistent store. How:
01332         // Load the new headers into a set, then merge the old headers. Since
01333         // set<> ignores duplicates, old headers with the same name as a new
01334         // header will got into the bit bucket. Define a special compare
01335         // functor to make sure that headers are compared using only their
01336         // name and not their value too.
01337         set<string, HeaderLess> merged_headers;
01338 
01339         // Load in the new headers
01340         copy(headers.begin(), headers.end(),
01341              inserter(merged_headers, merged_headers.begin()));
01342 
01343         // Get the old headers and load them in.
01344         vector<string> old_headers;
01345         read_metadata(entry->get_cachename(), old_headers);
01346         copy(old_headers.begin(), old_headers.end(),
01347              inserter(merged_headers, merged_headers.begin()));
01348 
01349         // Read the values back out. Use reverse iterators with back_inserter
01350         // to preserve header order. NB: vector<> does not support push_front
01351         // so we can't use front_inserter(). 01/09/03 jhrg
01352         vector<string> result;
01353         copy(merged_headers.rbegin(), merged_headers.rend(),
01354              back_inserter(result));
01355 
01356         write_metadata(entry->get_cachename(), result);
01357 #if 0
01358         entry->unlock();
01359 #endif
01360         entry->unlock_write_response();
01361                 unlock_cache_interface();
01362     }
01363     catch (...) {
01364         if (entry) {
01365 #if 0
01366                 entry->unlock();
01367 #endif
01368                 entry->unlock_read_response();
01369         }
01370         unlock_cache_interface();
01371         throw;
01372     }
01373 }
01374 
01386 bool
01387 HTTPCache::is_url_valid(const string &url)
01388 {
01389     lock_cache_interface();
01390 
01391     bool freshness;
01392     HTTPCacheTable::CacheEntry *entry = 0;
01393 
01394     DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
01395 
01396     try {
01397         if (d_always_validate) {
01398             unlock_cache_interface();
01399             return false;  // force re-validation.
01400         }
01401 
01402         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01403         if (!entry)
01404             throw Error("There is no cache entry for the URL: " + url);
01405 
01406         // If we supported range requests, we'd need code here to check if
01407         // there was only a partial response in the cache. 10/02/02 jhrg
01408 
01409         // In case this entry is of type "must-revalidate" then we consider it
01410         // invalid.
01411         if (entry->get_must_revalidate()) {
01412 #if 0
01413             entry->unlock();
01414 #endif
01415             entry->unlock_read_response();
01416             unlock_cache_interface();
01417             return false;
01418         }
01419 
01420         time_t resident_time = time(NULL) - entry->get_response_time();
01421         time_t current_age = entry->get_corrected_initial_age() + resident_time;
01422 
01423         // Check that the max-age, max-stale, and min-fresh directives
01424         // given in the request cache control header is followed.
01425         if (d_max_age >= 0 && current_age > d_max_age) {
01426             DBG(cerr << "Cache....... Max-age validation" << endl);
01427 #if 0
01428             entry->unlock();
01429 #endif
01430             entry->unlock_read_response();
01431             unlock_cache_interface();
01432             return false;
01433         }
01434         if (d_min_fresh >= 0
01435             && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
01436             DBG(cerr << "Cache....... Min-fresh validation" << endl);
01437 #if 0
01438             entry->unlock();
01439 #endif
01440             entry->unlock_read_response();
01441             unlock_cache_interface();
01442             return false;
01443         }
01444 
01445         freshness = (entry->get_freshness_lifetime()
01446                      + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
01447 #if 0
01448         entry->unlock();
01449 #endif
01450         entry->unlock_read_response();
01451         unlock_cache_interface();
01452     }
01453     catch (...) {
01454         if (entry) {
01455 #if 0
01456                 entry->unlock();
01457 #endif
01458                 entry->unlock_read_response();
01459         }
01460         unlock_cache_interface();
01461         throw;
01462     }
01463 
01464     return freshness;
01465 }
01466 
01494 FILE * HTTPCache::get_cached_response(const string &url,
01495                 vector<string> &headers, string &cacheName) {
01496     lock_cache_interface();
01497 
01498     FILE *body;
01499     HTTPCacheTable::CacheEntry *entry = 0;
01500 
01501     DBG(cerr << "Getting the cached response for " << url << endl);
01502 
01503     try {
01504         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01505         if (!entry) {
01506                 unlock_cache_interface();
01507                 return 0;
01508         }
01509 
01510         cacheName = entry->get_cachename();
01511         read_metadata(entry->get_cachename(), headers);
01512 
01513         DBG(cerr << "Headers just read from cache: " << endl);
01514         DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
01515 
01516         body = open_body(entry->get_cachename());
01517 
01518         DBG(cerr << "Returning: " << url << " from the cache." << endl);
01519 
01520         d_http_cache_table->bind_entry_to_data(entry, body);
01521     }
01522     catch (...) {
01523         if (entry)
01524 #if 0
01525         entry->unlock();
01526 #endif
01527         unlock_cache_interface();
01528         throw;
01529     }
01530 
01531     unlock_cache_interface();
01532 
01533     return body;
01534 }
01546 FILE *
01547 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
01548 {
01549         string discard_name;
01550         return get_cached_response(url, headers, discard_name);
01551 }
01552 
01563 FILE *
01564 HTTPCache::get_cached_response(const string &url)
01565 {
01566         string discard_name;
01567         vector<string> discard_headers;
01568         return get_cached_response(url, discard_headers, discard_name);
01569 }
01570 
01583 void
01584 HTTPCache::release_cached_response(FILE *body)
01585 {
01586     lock_cache_interface();
01587 
01588     try {
01589         d_http_cache_table->uncouple_entry_from_data(body);
01590     }
01591     catch (...) {
01592         unlock_cache_interface();
01593         throw;
01594     }
01595 
01596     unlock_cache_interface();
01597 }
01598 
01611 void
01612 HTTPCache::purge_cache()
01613 {
01614     lock_cache_interface();
01615 
01616     try {
01617         if (d_http_cache_table->is_locked_read_responses())
01618             throw Error("Attempt to purge the cache with entries in use.");
01619 
01620         d_http_cache_table->delete_all_entries();
01621     }
01622     catch (...) {
01623         unlock_cache_interface();
01624         throw;
01625     }
01626 
01627     unlock_cache_interface();
01628 }
01629 
01630 } // namespace libdap