Skip to content

Commit

Permalink
Merge pull request #78 from cicirello/feat-date-only
Browse files Browse the repository at this point in the history
Option to include dates only in the lastmod fields of XML sitemaps
  • Loading branch information
cicirello authored Oct 25, 2022
2 parents 3075bb5 + 367ae96 commit 105142a
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 10 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

### CI/CD

### Dependencies


## [1.9.0] - 2022-10-25

### Added
* Option to include dates only in the lastmod fields of XML sitemaps. Default includes full date-time.

### CI/CD
* Bump Python to 3.11 in CI/CD workflows.

Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,13 @@ for pages where the filename has the `.html` extension. If you prefer to exclude
`.html` extension from the URLs in your sitemap, then
pass `drop-html-extension: true` to the action in your workflow.
Note that you should also ensure that any canonical links that you list within
the html files corresponds to your choice here.
the html files corresponds to your choice here.

### `date-only`

The `date-only` input controls whether XML sitemaps include the full date and time in lastmod,
or only the date. The default is `date-only: false`, which includes the full date and time
in the lastmod fields. If you only want the date in the lastmod, then use `date-only: true`.

## Outputs

Expand Down Expand Up @@ -203,7 +209,7 @@ you can also use a specific version such as with:

```yml
- name: Generate the sitemap
uses: cicirello/generate-sitemap@v1.8.5
uses: cicirello/generate-sitemap@v1.9.0
with:
base-url-path: https://THE.URL.TO.YOUR.PAGE/
```
Expand Down
5 changes: 5 additions & 0 deletions action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ inputs:
description: 'Enables dropping .html from urls in sitemap.'
required: false
default: false
date-only:
description: 'Pass true to include only the date without the time in XML sitemaps; and false to include full date and time.'
required: false
default: false
outputs:
sitemap-path:
description: 'The path to the generated sitemap file.'
Expand All @@ -75,3 +79,4 @@ runs:
- ${{ inputs.sitemap-format }}
- ${{ inputs.additional-extensions }}
- ${{ inputs.drop-html-extension }}
- ${{ inputs.date-only }}
29 changes: 21 additions & 8 deletions generatesitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,16 @@ def urlstring(f, baseUrl, dropExtension=False) :
<loc>{0}</loc>
<lastmod>{1}</lastmod>
</url>"""

def xmlSitemapEntry(f, baseUrl, dateString, dropExtension=False) :

def removeTime(dateString) :
"""Removes the time from a date-time.
Keyword arguments:
dateString - The date-time.
"""
return dateString[:10]

def xmlSitemapEntry(f, baseUrl, dateString, dropExtension=False, dateOnly=False) :
"""Forms a string with an entry formatted for an xml sitemap
including lastmod date.
Expand All @@ -258,7 +266,10 @@ def xmlSitemapEntry(f, baseUrl, dateString, dropExtension=False) :
dateString - lastmod date correctly formatted
dropExtension - true to drop extensions of .html from the filename in urls
"""
return xmlSitemapEntryTemplate.format(urlstring(f, baseUrl, dropExtension), dateString)
return xmlSitemapEntryTemplate.format(
urlstring(f, baseUrl, dropExtension),
removeTime(dateString) if dateOnly else dateString
)

def writeTextSitemap(files, baseUrl, dropExtension=False) :
"""Writes a plain text sitemap to the file sitemap.txt.
Expand All @@ -273,7 +284,7 @@ def writeTextSitemap(files, baseUrl, dropExtension=False) :
sitemap.write(urlstring(f, baseUrl, dropExtension))
sitemap.write("\n")

def writeXmlSitemap(files, baseUrl, dropExtension=False) :
def writeXmlSitemap(files, baseUrl, dropExtension=False, dateOnly=False) :
"""Writes an xml sitemap to the file sitemap.xml.
Keyword Arguments:
Expand All @@ -285,7 +296,7 @@ def writeXmlSitemap(files, baseUrl, dropExtension=False) :
sitemap.write('<?xml version="1.0" encoding="UTF-8"?>\n')
sitemap.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
for f in files :
sitemap.write(xmlSitemapEntry(f, baseUrl, lastmod(f), dropExtension))
sitemap.write(xmlSitemapEntry(f, baseUrl, lastmod(f), dropExtension, dateOnly))
sitemap.write("\n")
sitemap.write('</urlset>\n')

Expand All @@ -310,7 +321,8 @@ def main(
includePDF,
sitemapFormat,
additionalExt,
dropExtension
dropExtension,
dateOnly
) :
"""The main function of the generate-sitemap GitHub Action.
Expand Down Expand Up @@ -340,7 +352,7 @@ def main(
if pathToSitemap[-1] != "/" :
pathToSitemap += "/"
if sitemapFormat == "xml" :
writeXmlSitemap(files, baseUrl, dropExtension)
writeXmlSitemap(files, baseUrl, dropExtension, dateOnly)
pathToSitemap += "sitemap.xml"
else :
writeTextSitemap(files, baseUrl, dropExtension)
Expand All @@ -360,7 +372,8 @@ def main(
includePDF = sys.argv[4].lower() == "true",
sitemapFormat = sys.argv[5],
additionalExt = set(sys.argv[6].lower().replace(",", " ").replace(".", " ").split()),
dropExtension = sys.argv[7].lower() == "true"
dropExtension = sys.argv[7].lower() == "true",
dateOnly = sys.argv[8].lower() == "true"
)


16 changes: 16 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,11 @@ def test_urlstring_drop_html(self) :
self.assertEqual(expected[i%len(expected)], gs.urlstring(f, base1, True))
self.assertEqual(expected[i%len(expected)], gs.urlstring(f, base2, True))

def test_removeTime(self) :
date = "2020-09-11T13:35:00-04:00"
expected = "2020-09-11"
self.assertEqual(expected, gs.removeTime(date))

def test_xmlSitemapEntry(self) :
base = "https://TESTING.FAKE.WEB.ADDRESS.TESTING/"
f = "./a.html"
Expand All @@ -581,6 +586,17 @@ def test_xmlSitemapEntry(self) :
expected = "<url>\n<loc>https://TESTING.FAKE.WEB.ADDRESS.TESTING/a</loc>\n<lastmod>2020-09-11T13:35:00-04:00</lastmod>\n</url>"
self.assertEqual(actual, expected)

def test_xmlSitemapEntryDateOnly(self) :
base = "https://TESTING.FAKE.WEB.ADDRESS.TESTING/"
f = "./a.html"
date = "2020-09-11T13:35:00-04:00"
actual = gs.xmlSitemapEntry(f, base, date, False, True)
expected = "<url>\n<loc>https://TESTING.FAKE.WEB.ADDRESS.TESTING/a.html</loc>\n<lastmod>2020-09-11</lastmod>\n</url>"
self.assertEqual(actual, expected)
actual = gs.xmlSitemapEntry(f, base, date, True, True)
expected = "<url>\n<loc>https://TESTING.FAKE.WEB.ADDRESS.TESTING/a</loc>\n<lastmod>2020-09-11</lastmod>\n</url>"
self.assertEqual(actual, expected)

def test_robotsTxtParser(self) :
expected = [ [],
["/"],
Expand Down

0 comments on commit 105142a

Please sign in to comment.