Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove LevelDB support #4112

Merged
merged 1 commit into from Oct 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
71 changes: 0 additions & 71 deletions scrapy/extensions/httpcache.py
Expand Up @@ -347,77 +347,6 @@ def _read_meta(self, spider, request):
return pickle.load(f)


class LeveldbCacheStorage(object):

def __init__(self, settings):
warn("The LevelDB storage backend is deprecated.",
ScrapyDeprecationWarning, stacklevel=2)
import leveldb
self._leveldb = leveldb
self.cachedir = data_path(settings['HTTPCACHE_DIR'], createdir=True)
self.expiration_secs = settings.getint('HTTPCACHE_EXPIRATION_SECS')
self.db = None

def open_spider(self, spider):
dbpath = os.path.join(self.cachedir, '%s.leveldb' % spider.name)
self.db = self._leveldb.LevelDB(dbpath)

logger.debug("Using LevelDB cache storage in %(cachepath)s" % {'cachepath': dbpath}, extra={'spider': spider})

def close_spider(self, spider):
# Do compactation each time to save space and also recreate files to
# avoid them being removed in storages with timestamp-based autoremoval.
self.db.CompactRange()
del self.db
garbage_collect()

def retrieve_response(self, spider, request):
data = self._read_data(spider, request)
if data is None:
return # not cached
url = data['url']
status = data['status']
headers = Headers(data['headers'])
body = data['body']
respcls = responsetypes.from_args(headers=headers, url=url)
response = respcls(url=url, headers=headers, status=status, body=body)
return response

def store_response(self, spider, request, response):
key = self._request_key(request)
data = {
'status': response.status,
'url': response.url,
'headers': dict(response.headers),
'body': response.body,
}
batch = self._leveldb.WriteBatch()
batch.Put(key + b'_data', pickle.dumps(data, protocol=2))
batch.Put(key + b'_time', to_bytes(str(time())))
self.db.Write(batch)

def _read_data(self, spider, request):
key = self._request_key(request)
try:
ts = self.db.Get(key + b'_time')
except KeyError:
return # not found or invalid entry

if 0 < self.expiration_secs < time() - float(ts):
return # expired

try:
data = self.db.Get(key + b'_data')
except KeyError:
return # invalid entry
else:
return pickle.loads(data)

def _request_key(self, request):
return to_bytes(request_fingerprint(request))



def parse_cachecontrol(header):
"""Parse Cache-Control header

Expand Down
1 change: 0 additions & 1 deletion tests/requirements-py3.txt
@@ -1,6 +1,5 @@
# Tests requirements
jmespath
leveldb; sys_platform != "win32"
pytest
pytest-cov
pytest-twisted
Expand Down
9 changes: 0 additions & 9 deletions tests/test_downloadermiddleware_httpcache.py
Expand Up @@ -156,15 +156,6 @@ def _get_settings(self, **new_settings):
return super(FilesystemStorageTest, self)._get_settings(**new_settings)


class LeveldbStorageTest(DefaultStorageTest):

try:
pytest.importorskip('leveldb')
except SystemError:
pytestmark = pytest.mark.skip("Test module skipped - 'SystemError: bad call flags' occurs when >= Python 3.8")
storage_class = 'scrapy.extensions.httpcache.LeveldbCacheStorage'


class DummyPolicyTest(_BaseTest):

policy_class = 'scrapy.extensions.httpcache.DummyPolicy'
Expand Down