From 078842fbc5f7fdfddb381e0614547d122aea08d6 Mon Sep 17 00:00:00 2001 From: Miraty Date: Mon, 15 Jan 2024 19:55:39 +0100 Subject: [PATCH] add support for po4a translations --- README.md | 12 ++++++++---- mkht.php | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2586f24..fbe2221 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # mkht.php -mkht.php is a PHP script for building HTML/CSS sites from source documents in PHP, Gemini, Pandoc Markdown, HTML and CSS. +mkht.php is a PHP script for building HTML/CSS sites from source documents in PHP, Gemtext, Pandoc Markdown, Gettext translation files, HTML and CSS. For my personal use cases, this project include some specific tweaks that may not be fully or correctly documented. @@ -19,9 +19,9 @@ If `site_path` is not set, it will default to current directory. ## Input -Source pages must end in `.md` and can use Markdown, HTML and PHP. +Source pages must end in `.md` and can use Gemtext, Markdown, HTML and PHP. -The following files have special meaning: +The following optional files have special meaning: `/config.ini` : some default settings can be changed by this file @@ -38,6 +38,9 @@ The following files have special meaning: `/end.inc.html` : added just before `` +`/po4a.cfg` +: [po4a](https://po4a.org/) configuration file + Files starting with a dot (except for `.htaccess` and `.well-known`) are ignored. Files containing `draft` in their name (separated from other characters by `.`) are ignored. @@ -66,7 +69,8 @@ IDs are attributed to titles according to their content, therefore modifying a t * PHP * gzip -* pandoc +* pandoc for Markdown → HTML +* [po4a](https://po4a.org/) for Gettext translation ## License diff --git a/mkht.php b/mkht.php index 696ffe5..77f94ef 100755 --- a/mkht.php +++ b/mkht.php @@ -10,7 +10,7 @@ define('ROOT', dirname($_SERVER['SCRIPT_FILENAME'])); if (!extension_loaded('tidy')) echo 'PHP tidy extension unavailable. Feature disabled.' . PHP_EOL; -foreach (['pandoc', 'gzip'] as $command) { +foreach (['pandoc', 'gzip', 'po4a'] as $command) { exec('command -v ' . $command, result_code: $code); if ($code !== 0) exit($command . ' command not available.' . PHP_EOL); @@ -53,14 +53,38 @@ function clearnetOrOnion($clearnet_url, $onion_url) { $dirs_last_update = []; $pages_langs = []; -$nodes = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE, RecursiveDirectoryIterator::SKIP_DOTS)); +$nodes_src = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE, RecursiveDirectoryIterator::SKIP_DOTS)); -foreach($nodes as $node) { +foreach($nodes_src as $node) { + $node_info = new SplFileInfo($node->getPathName()); + $src = $node_info->getPathname(); + if (str_starts_with($src, SITE . 'target/') OR str_starts_with($src, SITE . 'cache/')) + continue; + $target = str_replace(SITE, SITE . 'cache/', $src); + + $path_parts_src = pathinfo($src); + $path_parts_target = pathinfo($target); + if (preg_match('#/\.(?!htaccess|well-known)#', $src) !== 0) // Skip hidden nodes other than .htaccess and .well-known + continue; + if ($node_info->getType() !== 'file') + continue; + if (in_array('draft', explode('.', $path_parts_target['basename']), true)) + continue; + if (!file_exists($path_parts_target['dirname'])) // Create parent directory if needed + mkdir($path_parts_target['dirname'], 0755, true); + copy($src, $target); +} + +exec('po4a ' . SITE . 'po4a.cfg'); + +$nodes_cache = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE . 'cache/', RecursiveDirectoryIterator::SKIP_DOTS)); + +foreach($nodes_cache as $node) { $node_info = new SplFileInfo($node->getPathName()); $src = $node_info->getPathname(); if (str_starts_with($src, SITE . 'target/')) continue; - $target = str_replace(SITE, SITE . 'target/', $src); + $target = str_replace(SITE . 'cache/', SITE . 'target/', $src); $path_parts_src = pathinfo($src); $path_parts_target = pathinfo($target); @@ -92,7 +116,7 @@ foreach($nodes as $node) { foreach ($pages as $node) { $node_info = new SplFileInfo($node->getPathName()); $src = $node_info->getPathname(); - $target = str_replace(SITE, SITE . 'target/', $src); + $target = str_replace(SITE . 'cache/', SITE . 'target/', $src); $path_parts_src = pathinfo($src); $path_parts_target = pathinfo($target); @@ -230,6 +254,7 @@ foreach ($pages as $node) { 'indent-spaces' => 4, 'output-xhtml' => true, 'wrap' => 0, + 'hide-comments' => true, ]); $content = str_replace(' ', ' ', $content); } @@ -243,7 +268,7 @@ foreach ($pages as $node) { if ($page_names[$src] === 'index') continue; - $relative_addr = substr_replace($base_filepath_src . '.html', '', strpos($base_filepath_src, SITE), strlen(SITE)); + $relative_addr = str_replace(SITE . 'cache/', '', $base_filepath_src . '.html'); // As of RFC 3151: A URN Namespace for Public Identifiers $public_id = 'urn:publicid:' . $config['id'] . str_replace('/', '%2F', $relative_addr);