1
Fork 0
moonythm/src/metadata.rs

358 lines
8.4 KiB
Rust

use std::ffi::OsStr;
use std::fmt::Write;
use std::path::{Component, Path, PathBuf};
use std::process::Command;
use std::str::FromStr;
use anyhow::{anyhow, bail, Context};
use chrono::{DateTime, FixedOffset, Utc};
use jotdown::{Attributes, Container, Event};
use serde::Deserialize;
pub fn should_refresh_last_modified() -> bool {
std::env::var("MOONYTHM_UPDATE_LAST_MODIFIED").unwrap_or_default() == "1"
}
// {{{ Config
#[derive(Deserialize, Debug, Default)]
pub struct PageConfig {
pub created_at: Option<DateTime<FixedOffset>>,
pub sitemap_priority: Option<f32>,
pub sitemap_changefreq: Option<String>,
#[serde(default)]
pub sitemap_exclude: bool,
#[serde(default)]
pub hidden: bool,
}
impl PageConfig {
// {{{ Merge a single property. Errors out on duplicate values.
fn merge_prop<A: PartialEq + std::fmt::Debug>(
label: &str,
first: Option<A>,
second: Option<A>,
) -> anyhow::Result<Option<A>> {
match first {
None => Ok(second),
Some(first) => {
if let Some(second) = second {
if second != first {
bail!(
"Conflicting values for `{label}` page attribute: {first:?} and {second:?}"
);
}
}
Ok(Some(first))
}
}
}
// }}}
// {{{ Config merging
/// Merge another config into the current one. Might error out on duplicate values.
fn merge(&mut self, other: PageConfig) -> anyhow::Result<()> {
self.created_at = Self::merge_prop("created_at", self.created_at, other.created_at)?;
self.sitemap_priority = Self::merge_prop(
"sitemap_priority",
self.sitemap_priority,
other.sitemap_priority,
)?;
self.sitemap_changefreq = Self::merge_prop(
"sitemap_changefreq",
self.sitemap_changefreq.take(),
other.sitemap_changefreq,
)?;
self.sitemap_exclude |= other.sitemap_exclude;
self.hidden |= other.hidden;
Ok(())
}
// }}}
}
// }}}
// {{{ Routing
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum PageRoute {
Home,
NotFound,
Posts,
Post(String),
}
impl PageRoute {
// {{{ Convert a path to a route
fn from_path(path: &Path) -> anyhow::Result<Self> {
let Some(Component::Normal(first)) = path.components().nth(1) else {
bail!("Path is too short");
};
let result = if first == OsStr::new("index.dj") {
Self::Home
} else if first == OsStr::new("404.dj") {
Self::NotFound
} else if first == OsStr::new("echoes") {
let Some(Component::Normal(second)) = path.components().nth(2) else {
bail!("Cannot convert path '{:?}' to echo route", path);
};
let mut slice = second.to_str().unwrap();
if slice.ends_with(".dj") {
slice = slice.strip_suffix(".dj").unwrap();
}
if slice == "index" {
Self::Posts
} else {
Self::Post(slice.to_owned())
}
} else {
bail!("Cannot convert path '{:?}' to page route", path);
};
Ok(result)
}
// }}}
// {{{ Convert a route to a path
#[inline]
pub fn to_path(&self) -> PathBuf {
match self {
Self::Home => PathBuf::from_str("").unwrap(),
Self::NotFound => PathBuf::from_str("404").unwrap(),
Self::Posts => PathBuf::from_str("echoes").unwrap(),
Self::Post(id) => PathBuf::from_str(&format!("echoes/{id}")).unwrap(),
}
}
// }}}
}
// }}}
// {{{ Metadata
#[derive(Debug, Clone)]
pub struct Heading<'a> {
#[allow(dead_code)]
pub level: u8,
pub id: String, // Heading events own their ID, so we have to clone
pub events: Vec<jotdown::Event<'a>>,
}
#[derive(Debug)]
pub struct PageMetadata<'s> {
pub config: PageConfig,
pub route: PageRoute,
pub source_path: PathBuf,
pub title: Heading<'s>,
pub description: Vec<jotdown::Event<'s>>,
#[allow(dead_code)]
pub toc: Vec<Heading<'s>>,
pub source: &'s str,
pub word_count: usize,
pub last_modified: DateTime<FixedOffset>,
}
impl<'a> PageMetadata<'a> {
pub fn new(
last_modified_cache: &mut LastModifiedCache,
path: PathBuf,
source: &'a str,
mut events: impl Iterator<Item = Event<'a>>,
) -> anyhow::Result<Self> {
let route = PageRoute::from_path(&path)?;
let last_modified = if should_refresh_last_modified() {
let last_modified_output = Command::new("git")
.arg("log")
.arg("-1")
.arg(r#"--pretty=format:%cI"#)
.arg(&path)
.output()
.with_context(|| anyhow!("Could not read the last modification date for file"))?
.stdout;
let last_modified = String::from_utf8(last_modified_output)?;
let last_modified = if last_modified.is_empty() {
Utc::now().fixed_offset()
} else {
DateTime::parse_from_rfc3339(&last_modified).with_context(|| {
anyhow!(
"Failed to parse datetime returned by git '{}'",
last_modified
)
})?
};
last_modified_cache
.pages
.push((route.clone(), last_modified));
last_modified
} else {
last_modified_cache
.pages
.iter()
.find(|item| item.0 == route)
.map(|(_, last_modified)| *last_modified)
.unwrap_or_else(|| Utc::now().fixed_offset())
};
let mut w = Writer::new();
events.try_for_each(|e| w.render_event(&e))?;
let title = w
.toc
.first()
.ok_or_else(|| anyhow!("No heading found to infer title from"))?;
Ok(Self {
route,
title: title.clone(),
last_modified,
source_path: path,
source,
config: w.config,
description: w.description,
toc: w.toc,
word_count: w.word_count,
})
}
}
// }}}
// {{{ Metadata parsing
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum State {
Toplevel,
Heading,
Config,
Description,
}
struct Writer<'s> {
config: PageConfig,
toc: Vec<Heading<'s>>,
toml_text: String,
state: State,
word_count: usize,
description: Vec<jotdown::Event<'s>>,
}
impl<'s> Writer<'s> {
fn new() -> Self {
Self {
config: PageConfig::default(),
description: Vec::new(),
toc: Vec::new(),
toml_text: String::new(),
state: State::Toplevel,
word_count: 0,
}
}
fn render_event<'a>(&mut self, e: &'a jotdown::Event<'s>) -> anyhow::Result<()> {
if let Event::Str(content) = e {
if self.state != State::Config {
self.word_count += content
.split(" ")
.filter(|w| w.contains(|c: char| c.is_alphabetic()))
.count()
}
}
match e {
// {{{ Headings
Event::Start(Container::Heading { level, id, .. }, _) => {
assert_eq!(self.state, State::Toplevel);
self.state = State::Heading;
self.toc.push(Heading {
level: *level as u8,
events: Vec::new(),
// These ids are always borrowed, unless modified by the user (i.e. me)
id: id.to_string(),
})
}
Event::End(Container::Heading { .. }) => {
assert_eq!(self.state, State::Heading);
self.state = State::Toplevel;
}
// }}}
// {{{ TOML config blocks
Event::Start(Container::RawBlock { format: "toml" }, attrs) => {
assert_eq!(self.state, State::Toplevel);
if has_role(attrs, "config") {
self.state = State::Config
}
}
Event::End(Container::RawBlock { format: "toml" }) => {
if self.state == State::Config {
self.state = State::Toplevel;
let config: PageConfig = toml::from_str(&self.toml_text)
.with_context(|| "Failed to parse page config in TOML format")?;
self.config.merge(config)?;
self.toml_text.clear();
}
}
// }}}
// {{{ Descriptions
Event::Start(Container::Div { .. }, attrs) if self.state == State::Toplevel => {
if has_role(attrs, "description") {
self.state = State::Description
}
}
Event::End(Container::Div { .. }) if self.state == State::Description => {
self.state = State::Toplevel;
}
// }}}
Event::Str(str) if self.state == State::Config => {
self.toml_text.write_str(str)?;
}
_ if self.state == State::Description => {
self.description.push(e.clone());
}
_ if self.state == State::Heading => {
let last_heading = self.toc.last_mut().unwrap();
last_heading.events.push(e.clone());
}
_ => {}
}
Ok(())
}
}
// }}}
// {{{ Helpers
pub fn has_role(attrs: &Attributes, value: &str) -> bool {
attrs
.get_value("role")
.map_or(false, |role| format!("{role}") == value)
}
// }}}
// {{{ Last modified cache
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct LastModifiedCache {
pages: Vec<(PageRoute, DateTime<FixedOffset>)>,
}
impl LastModifiedCache {
pub fn from_file() -> anyhow::Result<LastModifiedCache> {
if should_refresh_last_modified() {
Ok(Self::default())
} else {
Ok(toml::de::from_str(&std::fs::read_to_string(
"last_modified.toml",
)?)?)
}
}
pub fn save(&self) -> anyhow::Result<()> {
Ok(std::fs::write(
"last_modified.toml",
toml::ser::to_string(self)?,
)?)
}
}
// }}}