Skip to content

Commit cf058fe

Browse files
committed
feat: Respect Cache-Control: max-age header
This change implements support for the `Cache-Control: max-age` HTTP header to avoid fetching feeds that have not expired. The `Feed` class now stores `last_checked` and `max_age` attributes. Before fetching a feed, the application checks if the cached version is still valid based on these attributes. If the cache is still fresh, the network request is skipped. After a successful fetch, the `Cache-Control` header is parsed from the response, and the `last_checked` and `max_age` attributes are updated. This ensures that subsequent fetches will respect the cache duration specified by the feed provider. This commit adds a test case to verify that the `Cache-Control: max-age` header is respected. The test starts a webserver that serves a feed with a `max-age` value, runs the feed to populate the cache, and then checks that the feed is not fetched again before the `max-age` expires, and is fetched again after it expires. Fixes: rss2email#286
1 parent 63d657c commit cf058fe

File tree

2 files changed

+73
-0
lines changed

2 files changed

+73
-0
lines changed

rss2email/feed.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ class Feed (object):
171171
'etag',
172172
'modified',
173173
'seen',
174+
'max_age',
175+
'last_checked',
174176
]
175177

176178
## saved/loaded from ConfigParser instance
@@ -352,6 +354,8 @@ def reset(self):
352354
self.etag = None
353355
self.modified = None
354356
self.seen = {} # type: Dict[str, Dict[str, Any]]
357+
self.max_age = None
358+
self.last_checked = None
355359

356360
def _set_name(self, name):
357361
if not self._name_regexp.match(name):
@@ -369,6 +373,13 @@ def _fetch(self):
369373
>>> parsed.status
370374
200
371375
"""
376+
if (self.max_age is not None and self.last_checked is not None and
377+
_time.time() < self.last_checked + self.max_age):
378+
_LOG.info('skipping {}: cache has not expired'.format(self.name))
379+
parsed = _feedparser.FeedParserDict()
380+
parsed['status'] = 304
381+
return parsed
382+
372383
_LOG.info('fetch {}'.format(self))
373384
if not self.url:
374385
raise _error.InvalidFeedConfig(setting='url', feed=self)
@@ -937,6 +948,17 @@ def run(self, send=True, clean=False):
937948
self.modified = None
938949
parsed = self._fetch()
939950

951+
if parsed.status != 304:
952+
self.last_checked = _time.time()
953+
if 'cache-control' in parsed.headers:
954+
match = _re.search(r'max-age=(\d+)', parsed.headers['cache-control'])
955+
if match:
956+
self.max_age = int(match.group(1))
957+
else:
958+
self.max_age = None
959+
else:
960+
self.max_age = None
961+
940962
if clean and len(parsed.entries) > 0:
941963
for guid in self.seen:
942964
self.seen[guid]['old'] = True

test/test_main.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,24 @@ def webserver_for_test_if_fetch(queue, timeout):
249249
finally:
250250
httpd.server_close()
251251

252+
def webserver_for_test_cache_control(queue, max_age):
253+
class CacheControlHandler(NoLogHandler):
254+
def do_GET(self):
255+
self.send_response(200)
256+
self.send_header('Cache-Control', 'max-age={}'.format(max_age))
257+
self.end_headers()
258+
with open(_os.path.join(test_dir, 'disqus/feed.rss'), 'rb') as f:
259+
self.wfile.write(f.read())
260+
261+
httpd = http.server.HTTPServer(('', 0), CacheControlHandler)
262+
try:
263+
port = httpd.server_address[1]
264+
queue.put(port)
265+
while queue.get() != "stop":
266+
httpd.handle_request()
267+
finally:
268+
httpd.server_close()
269+
252270
class TestFetch(unittest.TestCase):
253271
"Retrieving feeds from servers"
254272
def test_delay(self):
@@ -379,6 +397,39 @@ def test_only_new(self):
379397
self.assertIn("seen", content["feeds"][0])
380398
self.assertEqual(queue.get(), "done")
381399

400+
def test_cache_control(self):
401+
"Respects Cache-Control: max-age header"
402+
max_age = 5
403+
cfg = """[DEFAULT]
404+
to = example@example.com"""
405+
406+
queue = multiprocessing.Queue()
407+
webserver_proc = multiprocessing.Process(target=webserver_for_test_cache_control, args=(queue, max_age))
408+
webserver_proc.start()
409+
port = queue.get()
410+
411+
with ExecContext(cfg) as ctx:
412+
ctx.call("add", 'test', 'http://127.0.0.1:{port}/disqus/feed.rss'.format(port = port))
413+
414+
# First run, should fetch
415+
queue.put("next")
416+
p = ctx.call("run", "--no-send")
417+
self.assertIn("fetch", p.stderr)
418+
419+
# Second run, should be cached
420+
p = ctx.call("run", "--no-send")
421+
self.assertIn("cache has not expired", p.stderr)
422+
423+
# Wait for cache to expire
424+
time.sleep(max_age + 1)
425+
426+
# Third run, should fetch again
427+
queue.put("next")
428+
p = ctx.call("run", "--no-send")
429+
self.assertIn("fetch", p.stderr)
430+
431+
queue.put("stop")
432+
382433

383434
def webserver_for_test_send(queue):
384435
httpd = http.server.HTTPServer(('', 0), NoLogHandler)

0 commit comments

Comments
 (0)