mkht.php/mkht.php

265 lines
8.1 KiB
PHP
Raw Normal View History

2021-10-09 20:13:55 +02:00
#!/usr/bin/php
<?php
2023-05-23 16:49:39 +02:00
if (php_sapi_name() !== 'cli')
exit('Must be run from CLI.' . PHP_EOL);
2021-10-09 20:13:55 +02:00
2023-05-23 16:49:39 +02:00
const LF = "\n";
2021-10-09 20:13:55 +02:00
2023-05-23 16:49:39 +02:00
define('ROOT', dirname($_SERVER['SCRIPT_FILENAME']));
if (!extension_loaded('tidy'))
echo 'PHP tidy extension unavailable. Feature disabled.' . PHP_EOL;
foreach (['pandoc', 'gzip'] as $command) {
exec('command -v ' . $command, result_code: $code);
if ($code !== 0)
exit($command . ' command not available.' . PHP_EOL);
}
2021-10-09 20:13:55 +02:00
foreach ($argv as $arg) {
if ($arg === '-f')
$opt['force'] = true;
else
$args[] = $arg;
}
$opt['force'] ??= false;
2021-10-09 20:13:55 +02:00
define('SITE', $args[1] ?? getcwd());
2021-10-09 20:13:55 +02:00
define('DESTINATION', $args[2] ?? 'dns');
2021-10-09 20:13:55 +02:00
if (file_exists(SITE . '/config.ini'))
$config = parse_ini_file(SITE . '/config.ini');
2021-10-09 20:13:55 +02:00
$config['title'] ??= '';
$config['header'] ??= false;
2023-05-30 22:57:39 +02:00
$config['author'] ??= NULL;
$config['base-url'] ??= [];
$config['center-index'] ??= false;
$config['default-lang'] ??= NULL;
2023-05-31 02:40:31 +02:00
$config['announce-css'] ??= false;
$config['announce-feed'] ??= false;
if (!isset($config['id'])) {
$config['id'] = bin2hex(random_bytes(32));
file_put_contents(SITE . '/config.ini', 'id = "' . $config['id'] . '"' . LF, FILE_APPEND);
}
2021-10-12 00:03:18 +02:00
2023-05-31 02:40:31 +02:00
if ($config['announce-css'])
copy(ROOT . '/style.css', SITE . '/mkht-php.css');
2021-10-09 20:13:55 +02:00
// Determine whether links need to use Onion or DNS
2023-05-23 16:49:39 +02:00
function clearnetOrOnion($clearnet_url, $onion_url) {
return (DESTINATION === 'onion') ? $onion_url : $clearnet_url;
2021-10-09 20:13:55 +02:00
}
$files = new RecursiveIteratorIterator(new RecursiveDirectoryIterator(SITE . '/src', RecursiveDirectoryIterator::SKIP_DOTS));
foreach($files as $file) {
$info = new SplFileInfo($file->getPathName());
if ($info->getType() !== 'file' OR !in_array($info->getExtension(), ['gmi', 'md', 'html'], true) OR str_starts_with($info->getPathname(), '.'))
continue;
$files_dates[$info->getPathname()] = $info->getMTime();
}
asort($files_dates);
ob_start();
?>
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title><?= $config['title'] ?></title>
<id>urn:publicid:<?= $config['id'] ?></id>
<?php
foreach ($config['base-url'] as $url)
echo ' <link rel="self" type="application/atom+xml" href="' . $url . '/feed.atom"></link>' . LF;
?>
<updated><?= date('c', $files_dates[array_key_last($files_dates)]) ?></updated>
<author>
2023-05-30 22:57:39 +02:00
<name><?= $config['author'] ?? '' ?></name>
</author>
<?php
$feed = ob_get_clean();
foreach ($files_dates as $src_page => $last_mod) {
$dest_page = str_replace('/src/', '/', $src_page);
2023-05-31 03:11:36 +02:00
$content = file_get_contents($src_page);
2021-10-09 20:13:55 +02:00
2023-05-31 03:11:36 +02:00
preg_match('/^# ?(?<title>.*)$/Dm', $content, $matches);
$title = $matches['title'] ?? NULL;
2021-10-09 20:13:55 +02:00
$path_parts = pathinfo($dest_page);
2022-06-01 17:33:32 +02:00
$base_filepath = $path_parts['dirname'] . '/' . $path_parts['filename'];
2022-06-01 17:33:32 +02:00
if (!file_exists($dest_page) OR (filemtime($src_page) > filemtime($dest_page)) OR $opt['force']) {
echo 'Compiling ' . $src_page . ' ' . date("Y-m-d H:i:s", $last_mod) . LF;
2022-06-01 17:33:32 +02:00
// Create parent directory if needed
if (!file_exists($path_parts['dirname']))
mkdir($path_parts['dirname'], 0755, true);
2022-06-01 17:33:32 +02:00
// Execute PHP code
ob_start();
2023-05-31 03:11:36 +02:00
eval('?>' . $content);
$content = ob_get_clean();
2023-05-31 23:12:02 +02:00
file_put_contents($base_filepath . '.' . $path_parts['extension'], $content);
2022-06-01 17:33:32 +02:00
// Convert Gemtext to Markdown
if ($path_parts['extension'] === 'gmi') {
2023-05-31 23:12:02 +02:00
$content = preg_replace_callback(
'/^=>\h*(?<addr>\H+)(:?\h+(?<title>\H+))?$/Dm',
function ($matches) {
if (!str_contains($matches['addr'], ':') AND str_ends_with($matches['addr'], '.gmi'))
$matches['addr'] = substr($matches['addr'], 0, -3) . 'md';
return '[' . ($matches['title'] ?? $matches['addr']) . '](' . $matches['addr'] . ')';
},
$content,
);
2023-05-31 03:11:36 +02:00
file_put_contents($base_filepath . '.md', $content);
2022-06-01 17:33:32 +02:00
}
// Compile Markdown to HTML
$process = proc_open('pandoc --fail-if-warnings -f markdown_phpextra-citations-native_divs-native_spans+abbreviations+hard_line_breaks+lists_without_preceding_blankline -t html --wrap none', [
0 => ['pipe', 'r'],
1 => ['pipe', 'w'],
], $pipes);
if (is_resource($process) !== true)
exit('Can\'t spawn pandoc.' . PHP_EOL);
2023-05-31 03:11:36 +02:00
fwrite($pipes[0], $content);
fclose($pipes[0]);
2023-05-31 03:11:36 +02:00
$content = stream_get_contents($pipes[1]);
fclose($pipes[1]);
if (proc_close($process) !== 0)
exit('pandoc failed.' . PHP_EOL);
2022-06-01 17:33:32 +02:00
// .md > .html for local links
2023-05-31 23:12:02 +02:00
$content = preg_replace('/ href="([^:"]+)\.md"/', ' href="$1.html"', $content);
2022-06-01 17:33:32 +02:00
2023-05-31 23:12:02 +02:00
$relative_root_path = str_repeat('../', substr_count(str_replace(SITE, '', $path_parts['dirname']), '/'));
2022-06-01 17:33:32 +02:00
ob_start();
2022-06-01 17:33:32 +02:00
?>
<!DOCTYPE html>
<html lang="<?php
2022-06-01 17:33:32 +02:00
preg_match('#\.([a-zA-Z-]{2,5})\.#', $path_parts['basename'], $file_lang);
if (isset($file_lang[1])) {
$lang = $file_lang[1];
} else {
preg_match('#/([a-z]{2})(/|$)#', $path_parts['dirname'], $dir_lang);
$lang = $dir_lang[1] ?? $config['default-lang'];
}
echo $lang ?? '';
?>">
<head>
<meta charset="utf-8">
<?php
if (isset($title) AND isset($config['title']))
echo '<title>' . $title . ' · ' . $config['title'] . '</title>';
else if (isset($title))
echo '<title>' . $title . '</title>';
else if (isset($config['title']))
echo '<title>' . $config['title'] . '</title>';
?>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="referrer" content="no-referrer">
2022-06-01 17:33:32 +02:00
<?php
2023-05-30 22:57:39 +02:00
if (isset($config['author']))
echo '<meta name="author" content="' . $config['author'] . '">';
if ($config['announce-feed'])
2023-05-31 23:12:02 +02:00
echo '<link rel="alternate" type="application/atom+xml" href="' . $relative_root_path . 'feed.atom">' . LF;
2023-05-31 02:40:31 +02:00
if ($config['announce-css']) {
if (file_exists(SITE . '/style.css'))
2023-05-31 23:12:02 +02:00
echo '<link rel="stylesheet" media="screen" href="' . $relative_root_path . 'style.css">' . LF;
echo '<link rel="stylesheet" media="screen" href="' . $relative_root_path . 'mkht-php.css">' . LF;
2023-05-31 02:40:31 +02:00
}
if (file_exists(SITE . '/head.inc.html'))
echo file_get_contents(SITE . '/head.inc.html');
?>
</head>
<body>
<?php
if ($config['header']) {
?>
<header>
2023-05-31 23:12:02 +02:00
<a href="./<?= $relative_root_path ?>">
<?php
if (file_exists(SITE . '/img/logo.webp'))
echo '<img src="img/logo.webp" ' . getimagesize(SITE . '/img/logo.webp')[3] . ' alt="' . $config['title'] . '" />';
else
echo $config['site-title'];
?>
</a>
</header>
<?php
}
2022-06-01 17:33:32 +02:00
if ($config['center-index'] AND $path_parts['filename'] === 'index')
2023-05-31 03:11:36 +02:00
echo '<div class="centered">' . $content . '</div>';
2022-06-01 17:33:32 +02:00
else
2023-05-31 03:11:36 +02:00
echo '<main>' . $content . '</main>';
if (file_exists(SITE . '/end.inc.html'))
require SITE . '/end.inc.html';
echo '</body></html>';
2022-06-01 17:33:32 +02:00
2023-05-31 03:11:36 +02:00
$content = ob_get_clean();
2022-06-01 19:36:07 +02:00
if (extension_loaded('tidy')) {
2023-05-31 03:11:36 +02:00
$content = tidy_repair_string($content, [
2022-06-01 19:36:07 +02:00
'indent' => true,
'indent-spaces' => 4,
'output-xhtml' => true,
'wrap' => 0,
]);
2023-05-31 03:11:36 +02:00
$content = str_replace(' ', ' ', $content);
}
2023-05-31 03:11:36 +02:00
file_put_contents($base_filepath . '.html', $content);
// Gzip compression
exec('gzip --keep --fast --force ' . $base_filepath . '.html');
2022-06-01 19:36:07 +02:00
}
$relative_addr = substr_replace($base_filepath . '.html', '', strpos($base_filepath, SITE), strlen(SITE));
2022-06-01 17:33:32 +02:00
// As of RFC 3151: A URN Namespace for Public Identifiers
$public_id = 'urn:publicid:' . $config['id'] . str_replace('/', '%2F', $relative_addr);
2023-05-24 18:57:21 +02:00
preg_match('#\<body\>(?<content>.*)\</body\>#s', file_get_contents($base_filepath . '.html'), $match);
$atom_entry_content = $match['content'];
// Make relative links absolute
$atom_entry_content = preg_replace_callback('# href=\"(?<relative_url>[^:"]+)\"#', function ($matches) {
global $config;
global $path_parts;
2023-05-30 22:57:39 +02:00
return ' href="' . ($config['base-url'][0] ?? '') . substr($path_parts['dirname'], strlen(SITE)) . '/' . $matches['relative_url'] . '"';
}, $atom_entry_content);
if (!in_array('draft', explode('.', $path_parts['basename']), true)) {
ob_start();
?>
<entry>
<title><?= $title ?></title>
<id><?= $public_id ?></id>
<updated><?= date('c', $last_mod) ?></updated>
<?php
foreach ($config['base-url'] as $base_url)
echo ' <link rel="alternate" type="text/html" href="' . $base_url . $relative_addr . '"></link>' . LF;
?>
<content type="html"><?= htmlspecialchars($atom_entry_content) ?></content>
</entry>
<?php
$feed .= ob_get_clean();
}
2021-10-09 20:13:55 +02:00
}
file_put_contents(SITE . '/feed.atom', $feed . '</feed>' . LF);