diff --git a/README.md b/README.md index 541acfd..97deb35 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ See PardallMarkdown in action and learn how to use it by following this video: Add dependency and application into your `mix.exs`: ```elixir defp deps do -[{:pardall_markdown, "~> 0.3.2"} ...] +[{:pardall_markdown, "~> 0.3.3"} ...] end def application do @@ -208,7 +208,7 @@ The following configuration properties are available (all optional): - `title`: the post title. If not provided, a title will be generated from the post slug. - `date`: the date or date-time to be considered for the post, string, ISO format. If not provided, the file modification date will be considered as the post date. - `published`: a post without `published: true` set will be considered draft. The default can be inverted when the configuration `:is_content_draft_by_default` is set to `false`, this way, posts will always be considered as published, unless they contain: `published: false`. -- `summary`: post description or short content. +- `summary`: post description or short content. If `summary` is not provided, a summary will be generated from the Post's content/body. - `position`: if the post topmost taxonomy has a `:sort_by` rule set to `:position`, this is the value that will be used to sort the post (see below). - `slug`: override the post slug. As seem above, by default, slugs are generated from the file names and are the main, unique identifier of posts. - If you override the slug with this property, make sure to put the full path, prepended by a slash, example: `slug: "/my/custom/slug"`. diff --git a/lib/pardall_markdown/content/html_utils.ex b/lib/pardall_markdown/content/html_utils.ex index 5a560b9..e123e4c 100644 --- a/lib/pardall_markdown/content/html_utils.ex +++ b/lib/pardall_markdown/content/html_utils.ex @@ -1,6 +1,63 @@ defmodule PardallMarkdown.Content.HtmlUtils do alias PardallMarkdown.Content.Utils + def generate_summary_from_html(html, expected_length \\ 157) + def generate_summary_from_html(html, _) when html == nil or html == "", do: nil + + @doc """ + Extract text from paragraphs `

` of a HTML `html` string, + and assemble a string up until it reaches `expected_length` length. + + If the generated string length matches `expected_length`, an ellipsis + will be appended to it. If the generated string is smaller than `expected_length`, + then no ellipsis is added. + + If no text could be extracted from the input html, returns nil. + + ## Examples + + iex> PardallMarkdown.Content.HtmlUtils.generate_summary_from_html("

Post Title

So, a description will be generated from it. Even a nested span.

As you can see, this a long paragraph outside.

This is an anchor.") + "So, a description will be generated from it. Even a nested span. As you can see, this a long paragraph outside." + + iex> PardallMarkdown.Content.HtmlUtils.generate_summary_from_html("

Post Title

So, a description will be generated from it. Even a nested span.

Another paragraph?

Another paragraph 2?

Another paragraph 3?

As you can see, this a very long paragraph. As you can see, this a very long paragraph.

") + "So, a description will be generated from it. Even a nested span. Another paragraph? Another paragraph 2? Another paragraph 3? As you can see, this a very long..." + """ + def generate_summary_from_html(html, expected_length) do + document = Floki.parse_fragment!(html) + + Floki.find(document, "p") + |> Enum.reduce("", fn + {"p", _, children}, "" -> + truncate(String.trim(children |> Floki.text()), expected_length) + + {"p", _, children}, final -> + if String.length(final) < expected_length do + truncate(final <> " " <> String.trim(children |> Floki.text()), expected_length) + else + final + end + + _, final -> final + end) + |> trim_and_maybe_ellipsis(expected_length) + end + + defp truncate(string, length) do + if String.length(string) <= length do + string + else + String.slice(string, 0..length) + end + end + + defp trim_and_maybe_ellipsis(string, _) + when string == "" or is_nil(string), do: nil + defp trim_and_maybe_ellipsis(string, expected_length) do + string = String.trim(string) + if String.length(string) < expected_length, + do: string, else: string <> "..." + end + def convert_internal_links_to_live_links(html) do {updated_tree, _} = Floki.parse_fragment!(html) diff --git a/lib/pardall_markdown/file_parser.ex b/lib/pardall_markdown/file_parser.ex index 772e7cf..7a22213 100644 --- a/lib/pardall_markdown/file_parser.ex +++ b/lib/pardall_markdown/file_parser.ex @@ -55,14 +55,14 @@ defmodule PardallMarkdown.FileParser do with {:ok, raw_content} <- File.read(path), {:ok, attrs, body} <- parse_contents(path, raw_content, is_index?), {:ok, body_html, _} <- markdown_to_html(body), - {:ok, summary_html, _} <- maybe_summary_to_html(attrs), + {:ok, summary} <- get_summary(attrs, body_html), {:ok, date} <- parse_or_get_date(attrs, path) do attrs = attrs |> maybe_extract_and_put_slug(path) |> extract_and_put_categories(path) |> maybe_put_title(path, is_index?) - |> Map.put(:summary, summary_html) + |> Map.put(:summary, summary) |> Map.put(:date, date) |> Map.put(:is_index, is_index?) @@ -106,10 +106,10 @@ defmodule PardallMarkdown.FileParser do ]) end - defp maybe_summary_to_html(%{summary: summary}) when is_binary(summary) and summary != "", - do: summary |> markdown_to_html() + defp get_summary(%{summary: summary}, _) when is_binary(summary) and summary != "", + do: {:ok, summary} - defp maybe_summary_to_html(_), do: {:ok, nil, :ignore} + defp get_summary(_, body_html), do: {:ok, generate_summary_from_html(body_html)} defp markdown_to_html(content), do: content |> Earmark.as_html(escape: false) diff --git a/lib/pardall_markdown/repository.ex b/lib/pardall_markdown/repository.ex index d446258..67586d8 100644 --- a/lib/pardall_markdown/repository.ex +++ b/lib/pardall_markdown/repository.ex @@ -108,7 +108,7 @@ defmodule PardallMarkdown.Repository do if slug not found. """ def get_by_slug!(slug) do - get_by_slug(slug) || raise PardallMarkdown.Content.NotFoundError, "Page not found: #{slug}" + get_by_slug(slug) || raise PardallMarkdown.Content.NotFoundError, "Post not found: #{slug}" end def push_post(path, %{slug: slug, is_index: is_index?} = attrs, content, _type \\ :post) do diff --git a/mix.exs b/mix.exs index b4ffc84..20838b8 100644 --- a/mix.exs +++ b/mix.exs @@ -2,7 +2,7 @@ defmodule PardallMarkdown.MixProject do use Mix.Project @url "https://github.com/alfredbaudisch/pardall_markdown" - @version "0.3.2" + @version "0.3.3" def project do [ diff --git a/test/content/blog/dailies/3d/blender/default-cube-not-deleted.md b/test/content/blog/dailies/3d/blender/default-cube-not-deleted.md index 2924c7e..bec4a54 100644 --- a/test/content/blog/dailies/3d/blender/default-cube-not-deleted.md +++ b/test/content/blog/dailies/3d/blender/default-cube-not-deleted.md @@ -5,4 +5,4 @@ } --- -Do not delete Blender's Default Cube! \ No newline at end of file +Do not delete the Default Cube! \ No newline at end of file diff --git a/test/content/blog/dailies/first-day.md b/test/content/blog/dailies/first-day.md index 30adb35..8eecce2 100644 --- a/test/content/blog/dailies/first-day.md +++ b/test/content/blog/dailies/first-day.md @@ -1,7 +1,8 @@ %{ title: "This is the beginning of the project PardallMarkdown!", date: "2020-08-30", - published: true + published: true, + summary: "Custom post summary" } --- diff --git a/test/pardall_markdown/html_test.exs b/test/pardall_markdown/html_test.exs index 2b650c1..2666e02 100644 --- a/test/pardall_markdown/html_test.exs +++ b/test/pardall_markdown/html_test.exs @@ -2,6 +2,48 @@ defmodule PardallMarkdown.HtmlTest do use ExUnit.Case, async: true alias PardallMarkdown.Content.HtmlUtils + @moduletag :html_utils + doctest(PardallMarkdown.Content.HtmlUtils) + + @tag :post_summary + test "generate post summary" do + html = ~S""" +

Post Title

+ +
+
+
+

So, a description will be generated from it. Even a nested span.

+

Another paragraph?

+

Another paragraph 2?

+

Another paragraph 3?

+

As you can see, this a very long paragraph. As you can see, this a very long paragraph. As you can see, this a very long paragraph. As you can see, this a very long paragraph. As you can see, this a very long paragraph. As you can see, this a very long paragraph. As you can see, this a very long paragraph. As you can see, this a very long paragraph.

+
+
+
+ +

As you can see, this a paragraph outside.

+ + This is an anchor. + """ + + assert HtmlUtils.generate_summary_from_html(html) == "So, a description will be generated from it. Even a nested span. Another paragraph? Another paragraph 2? Another paragraph 3? As you can see, this a very long..." + + html = ~S""" +

Post Title

+ +

So, a description will be generated from it. Even a nested span.

+ +

As you can see, this a long paragraph outside.

This is an anchor. + """ + + assert HtmlUtils.generate_summary_from_html(html) == "So, a description will be generated from it. Even a nested span. As you can see, this a long paragraph outside." + + html = "

Do not delete Blender's Default Cube!

" + + assert HtmlUtils.generate_summary_from_html(html) == "Do not delete Blender's Default Cube!" + end + test "make internal links as live links" do html = ~S""" This is a link to an internal post. diff --git a/test/pardall_markdown/repository_test.exs b/test/pardall_markdown/repository_test.exs index bd79dcd..2f130e9 100644 --- a/test/pardall_markdown/repository_test.exs +++ b/test/pardall_markdown/repository_test.exs @@ -5,7 +5,18 @@ defmodule PardallMarkdown.RepositoryTest do setup do Application.ensure_all_started(:pardall_markdown) # wait the Markdown content to be parsed and built - Process.sleep(100) + Process.sleep(300) + end + + @tag :post_summary + test "custom post summary and generated post summary" do + # Custom + post = Repository.get_by_slug!("/blog/dailies/first-day") + assert post.summary == "Custom post summary" + + # Generated + post = Repository.get_by_slug!("/blog/dailies/3d/blender/default-cube-not-deleted") + assert post.summary == "Do not delete the Default Cube!" end # still not accounting for per-folder indexing