django sitemap分页及适配百度、神马、搜狗搜索引擎的方法
本文主要讲一下sitemap分页如果自定义url格式
Django Sitemap 中的URL样式为:
https://www.soogor.com/sitemap-article.xml
https://www.soogor.com/sitemap-article.xml?p=2
像“https://www.soogor.com/sitemap-article.xml?p=2”这样的格式在百度、神马等搜索引擎是提交不了的,因为要求必要以.xml结尾。
所以我们就要改变一下URL
在之前做sitemap时,我们引入了from django.contrib.sitemaps import views as sitemaps_views ,所以我们就查看一下这里的views,位于
venv>Lib>site-packages>django>contrib>sitemaps>views.py
下面我们将这里进行修改。
我们在项目配置文件夹下新建sgsitemap.py文件(也就是settings.py所在的文件夹)
sgsitemap.py
import datetime
from functools import wraps
from django.contrib.sites.shortcuts import get_current_site
from django.core.paginator import EmptyPage, PageNotAnInteger
from django.http import Http404
from django.template.response import TemplateResponse
from django.urls import reverse
from django.utils import timezone
from django.utils.http import http_date
def x_robots_tag(func):
@wraps(func)
def inner(request, *args, **kwargs):
response = func(request, *args, **kwargs)
response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
return response
return inner
@x_robots_tag
def index(
request,
sitemaps,
template_name="sitemap_index.xml",
content_type="application/xml",
sitemap_url_name="django.contrib.sitemaps.views.sitemap",
):
req_protocol = request.scheme
req_site = get_current_site(request)
sites = [] # all sections' sitemap URLs
for section, site in sitemaps.items():
# For each section label, add links of all pages of its sitemap
# (usually generated by the `sitemap` view).
if callable(site):
site = site()
protocol = req_protocol if site.protocol is None else site.protocol
sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
sites.append(absolute_url)
# Add links to all pages of the sitemap.
for page in range(2, site.paginator.num_pages + 1):
#开始定义URL样式
if '_' in section:#区分section字串符
section=section.split('_')[0]
section='{}_{}'.format(section,page)#建立新的样式
sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
sites.append(absolute_url)
return TemplateResponse(
request, template_name, {"sitemaps": sites}, content_type=content_type
)
@x_robots_tag
def sitemap(
request,
sitemaps,
section=None,
template_name="sitemap.xml",
content_type="application/xml",
):
req_protocol = request.scheme
req_site = get_current_site(request)
page = request.GET.get("p", 1)
#将自定义URL section 拆分取得对应的变量值
if '_' in section:
section_res=section
section=section_res.split('_')[0]
page = section_res.split('_')[1]
# print(section,page)
if section is not None:
if section not in sitemaps:
raise Http404("No sitemap available for section: %r" % section)
maps = [sitemaps[section]]
else:
maps = sitemaps.values()
lastmod = None
all_sites_lastmod = True
urls = []
for site in maps:
try:
if callable(site):
site = site()
urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
if all_sites_lastmod:
site_lastmod = getattr(site, "latest_lastmod", None)
if site_lastmod is not None:
if not isinstance(site_lastmod, datetime.datetime):
site_lastmod = datetime.datetime.combine(
site_lastmod, datetime.time.min
)
if timezone.is_naive(site_lastmod):
site_lastmod = timezone.make_aware(site_lastmod, timezone.utc)
lastmod = (
site_lastmod if lastmod is None else max(lastmod, site_lastmod)
)
else:
all_sites_lastmod = False
except EmptyPage:
raise Http404("Page %s empty" % page)
except PageNotAnInteger:
raise Http404("No page '%s'" % page)
response = TemplateResponse(
request, template_name, {"urlset": urls}, content_type=content_type
)
if all_sites_lastmod and lastmod is not None:
# if lastmod is defined for all sites, set header so as
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
response.headers["Last-Modified"] = http_date(lastmod.timestamp())
return response
然后我们再修改urls.py
urls.py
from . import sgsitemap#引入我们刚才制作的视图
urlpatterns += [
path('sitemap.xml',
cache_page(86400)(sgsitemap.index), {#sgsitemap.index,通过这里来改变分页样式
'sitemaps': sitemaps,
'sitemap_url_name': 'sitemaps'
}),
path('sitemap-<section>.xml',
cache_page(86400)(sgsitemap.sitemap), {'sitemaps': sitemaps},
name='sitemaps'),
]
THE END