diff --git a/apps/blog/utils.py b/apps/blog/utils.py index 68d4e0afc..5bcf4b476 100644 --- a/apps/blog/utils.py +++ b/apps/blog/utils.py @@ -145,6 +145,27 @@ class RedisKeys: feed_hub_data = 'feed.hub.data.{hour}' # feed 数据 +def check_request_headers(headers_obj): + """ + 校验请求头信息,比如识别User-Agent,从而过滤掉该请求 + @param headers_obj: request.headers对象 + @return: + use: flag = check_request_headers(request.headers) + """ + # 常见的搜索引擎爬虫的请求头,还有Python的 + # 无请求头或者请求头里面包含爬虫信息则返回False,否则返回True + user_agent_black_keys = ['spider', 'bot', 'python'] + if not headers_obj.get('user-agent'): + return False + else: + user_agent = str(headers_obj.get('user-agent')).lower() + for key in user_agent_black_keys: + if key in user_agent: + logger.warning(f'Bot/Spider request user-agent:{user_agent}') + return False + return True + + def add_views(url, name=None, is_cache=True): """ 单页面访问量统计的视图函数装饰器 @@ -163,27 +184,28 @@ def wrapper(request, *args, **kwargs): # 仅访问页面的时候才进行计算,接口调用不计算,管理员访问也不计算 if request.method == "GET" and not request.is_ajax() and not request.user.is_superuser: # 获取或者创建一个实例 - logger.info(request.headers.items()) - page_views = PageView.objects.filter(url=url) - if page_views: - obj = page_views.first() - else: - obj = PageView(url=url, name=name, views=0) - obj.save() - - if is_cache: # 要判断缓存,则存状态 - cache_key = f'page_views:read:{url}' - is_read_time = request.session.get(cache_key) - if not is_read_time: - obj.update_views() - request.session[cache_key] = time.time() + # logger.info(request.headers.items()) + if check_request_headers(request.headers): + page_views = PageView.objects.filter(url=url) + if page_views: + obj = page_views.first() else: - t = time.time() - is_read_time - if t > 60 * 30: + obj = PageView(url=url, name=name, views=0) + obj.save() + + if is_cache: # 要判断缓存,则存状态 + cache_key = f'page_views:read:{url}' + is_read_time = request.session.get(cache_key) + if not is_read_time: obj.update_views() request.session[cache_key] = time.time() - else: - obj.update_views() + else: + t = time.time() - is_read_time + if t > 60 * 30: + obj.update_views() + request.session[cache_key] = time.time() + else: + obj.update_views() # ******* 浏览量增加的逻辑 ******* return result diff --git a/apps/blog/views.py b/apps/blog/views.py index 8dac2819c..718029505 100644 --- a/apps/blog/views.py +++ b/apps/blog/views.py @@ -21,7 +21,12 @@ from markdown.extensions.toc import TocExtension # 锚点的拓展 from .models import Article, Tag, Category, Timeline, Silian, AboutBlog, FriendLink, Subject -from .utils import site_full_url, CustomHtmlFormatter, ApiResponse, ErrorApiResponse, add_views +from .utils import (site_full_url, + CustomHtmlFormatter, + ApiResponse, + ErrorApiResponse, + add_views, + check_request_headers) # Create your views here. @@ -84,21 +89,22 @@ def get_object(self, queryset=None): obj = super().get_object() # 设置浏览量增加时间判断,同一篇文章两次浏览超过半小时才重新统计阅览量,作者浏览忽略 u = self.request.user - ses = self.request.session - the_key = self.context_object_name + ':read:{}'.format(obj.id) - is_read_time = ses.get(the_key) - if u == obj.author or u.is_superuser: - pass - else: - if not is_read_time: - obj.update_views() - ses[the_key] = time.time() + if check_request_headers(self.request.headers): # 请求头校验通过才计算阅读量 + ses = self.request.session + the_key = self.context_object_name + ':read:{}'.format(obj.id) + is_read_time = ses.get(the_key) + if u == obj.author or u.is_superuser: + pass else: - now_time = time.time() - t = now_time - is_read_time - if t > 60 * 30: + if not is_read_time: obj.update_views() ses[the_key] = time.time() + else: + now_time = time.time() + t = now_time - is_read_time + if t > 60 * 30: + obj.update_views() + ses[the_key] = time.time() # 获取文章更新的时间,判断是否从缓存中取文章的markdown,可以避免每次都转换 ud = obj.update_date.strftime("%Y%m%d%H%M%S") md_key = self.context_object_name + ':markdown:{}:{}'.format(obj.id, ud)