Source code for requests_cache.backends.filesystem

"""
.. image::
    ../_static/files-generic.png

This backend stores responses in files on the local filesystem (one file per response).

File Formats
^^^^^^^^^^^^
By default, responses are saved as pickle files, since this format is generally the fastest. If you
want to save responses in a human-readable format, you can use one of the other available
:ref:`serializers`. For example, to save responses as JSON files:

    >>> session = CachedSession('~/http_cache', backend='filesystem', serializer='json')
    >>> session.get('https://httpbin.org/get')
    >>> print(list(session.cache.paths()))
    ['/home/user/http_cache/4dc151d95200ec.json']

Or as YAML (requires ``pyyaml``):

    >>> session = CachedSession('~/http_cache', backend='filesystem', serializer='yaml')
    >>> session.get('https://httpbin.org/get')
    >>> print(list(session.cache.paths()))
    ['/home/user/http_cache/4dc151d95200ec.yaml']

Cache Files
^^^^^^^^^^^
* See :ref:`files` for general info on specifying cache paths
* The path for a given response will be in the format ``<cache_name>/<cache_key>``
* Redirects are stored in a separate SQLite database, located at ``<cache_name>/redirects.sqlite``
* Use :py:meth:`.FileCache.paths` to get a list of all cached response paths

API Reference
^^^^^^^^^^^^^
.. automodsumm:: requests_cache.backends.filesystem
   :classes-only:
   :nosignatures:
"""
from contextlib import contextmanager
from os import makedirs
from pathlib import Path
from pickle import PickleError
from shutil import rmtree
from typing import Iterator

from ..serializers import SERIALIZERS
from . import BaseCache, BaseStorage
from .sqlite import AnyPath, SQLiteDict, get_cache_path


[docs]class FileCache(BaseCache): """Filesystem backend. Args: cache_name: Base directory for cache files use_cache_dir: Store datebase in a user cache directory (e.g., `~/.cache/`) use_temp: Store cache files in a temp directory (e.g., ``/tmp/http_cache/``). Note: if ``cache_name`` is an absolute path, this option will be ignored. extension: Extension for cache files. If not specified, the serializer default extension will be used. """ def __init__(self, cache_name: AnyPath = 'http_cache', use_temp: bool = False, **kwargs): super().__init__(**kwargs) self.responses: FileDict = FileDict(cache_name, use_temp=use_temp, **kwargs) self.redirects: SQLiteDict = SQLiteDict( self.cache_dir / 'redirects.sqlite', 'redirects', **kwargs ) @property def cache_dir(self) -> Path: """Base directory for cache files""" return Path(self.responses.cache_dir)
[docs] def paths(self) -> Iterator[Path]: """Get absolute file paths to all cached responses""" return self.responses.paths()
[docs] def clear(self): """Clear the cache""" # FileDict.clear() removes the cache directory, including redirects.sqlite self.responses.clear() self.redirects.init_db()
[docs]class FileDict(BaseStorage): """A dictionary-like interface to files on the local filesystem""" def __init__( self, cache_name: AnyPath, use_temp: bool = False, use_cache_dir: bool = False, extension: str = None, **kwargs, ): super().__init__(**kwargs) self.cache_dir = get_cache_path(cache_name, use_cache_dir=use_cache_dir, use_temp=use_temp) self.extension = _get_extension(extension, self.serializer) self.is_binary = False makedirs(self.cache_dir, exist_ok=True) @contextmanager def _try_io(self, ignore_errors: bool = False): """Attempt an I/O operation, and either ignore errors or re-raise them as KeyErrors""" try: yield except (IOError, OSError, PickleError) as e: if not ignore_errors: raise KeyError(e) def _path(self, key) -> Path: return self.cache_dir / f'{key}{self.extension}' def __getitem__(self, key): mode = 'rb' if self.is_binary else 'r' with self._try_io(): try: with self._path(key).open(mode) as f: return self.serializer.loads(f.read()) except UnicodeDecodeError: self.is_binary = True return self.__getitem__(key) def __delitem__(self, key): with self._try_io(): self._path(key).unlink() def __setitem__(self, key, value): serialized_value = self.serializer.dumps(value) if isinstance(serialized_value, bytes): self.is_binary = True mode = 'wb' if self.is_binary else 'w' with self._try_io(): with self._path(key).open(mode) as f: f.write(self.serializer.dumps(value)) def __iter__(self): yield from self.keys() def __len__(self): return sum(1 for _ in self.paths())
[docs] def clear(self): with self._try_io(ignore_errors=True): rmtree(self.cache_dir, ignore_errors=True) self.cache_dir.mkdir()
[docs] def keys(self): return [path.stem for path in self.paths()]
[docs] def paths(self) -> Iterator[Path]: """Get absolute file paths to all cached responses""" return self.cache_dir.glob(f'*{self.extension}')
def _get_extension(extension: str = None, serializer=None) -> str: """Use either the provided file extension, or get the serializer's default extension""" if extension: return f'.{extension}' for name, obj in SERIALIZERS.items(): if serializer is obj: return '.' + name.replace('pickle', 'pkl') return ''