This commit is contained in:
2026-03-29 14:01:52 +03:00
commit 0611279128
210 changed files with 60454 additions and 0 deletions

883
Doc/ttman.c Normal file
View File

@@ -0,0 +1,883 @@
/*
* ttman - text to man converter
*
* Copyright 2006 Timo Hirvonen <tihirvon@gmail.com>
*
* This file is licensed under the GPLv2.
*/
#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
struct token {
struct token *next;
struct token *prev;
enum {
TOK_TEXT, // max one line w/o \n
TOK_NL, // \n
TOK_ITALIC, // `
TOK_BOLD, // *
TOK_INDENT, // \t
// keywords (@...)
TOK_H1,
TOK_H2,
TOK_LI,
TOK_BR,
TOK_PRE,
TOK_ENDPRE, // must be after TOK_PRE
TOK_RAW,
TOK_ENDRAW, // must be after TOK_RAW
TOK_TITLE, // WRITE 2 2001-12-13 "Linux 2.0.32" "Linux Programmer's Manual"
} type;
int line;
// not NUL-terminated
const char *text;
// length of text
int len;
};
static const char *program;
static const char *filename;
static char tmp_file[1024];
static FILE *outfile;
static int cur_line = 1;
static struct token head = { &head, &head, TOK_TEXT, 0, NULL, 0 };
#define CONST_STR(str) { str, sizeof(str) - 1 }
static const struct {
const char *str;
int len;
} token_names[] = {
CONST_STR("text"),
CONST_STR("nl"),
CONST_STR("italic"),
CONST_STR("bold"),
CONST_STR("indent"),
// keywords
CONST_STR("h1"),
CONST_STR("h2"),
CONST_STR("li"),
CONST_STR("br"),
CONST_STR("pre"),
CONST_STR("endpre"),
CONST_STR("raw"),
CONST_STR("endraw"),
CONST_STR("title")
};
#define NR_TOKEN_NAMES (sizeof(token_names) / sizeof(token_names[0]))
#define BUG() die("BUG in %s\n", __FUNCTION__)
#ifdef __GNUC__
#define CMUS_NORETURN __attribute__((__noreturn__))
#else
#define CMUS_NORETURN
#endif
static CMUS_NORETURN void quit(void)
{
if (tmp_file[0])
unlink(tmp_file);
exit(1);
}
static CMUS_NORETURN void die(const char *format, ...)
{
va_list ap;
fprintf(stderr, "%s: ", program);
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
quit();
}
static CMUS_NORETURN void syntax(int line, const char *format, ...)
{
va_list ap;
fprintf(stderr, "%s:%d: error: ", filename, line);
va_start(ap, format);
vfprintf(stderr, format, ap);
va_end(ap);
quit();
}
static inline const char *keyword_name(int type)
{
if (type < TOK_H1 || type > TOK_TITLE)
die("BUG: no keyword name for type %d\n", type);
return token_names[type].str;
}
static void *xmalloc(size_t size)
{
void *ret = malloc(size);
if (!ret)
die("OOM when allocating %ul bytes\n", size);
return ret;
}
static char *memdup(const char *str, int len)
{
char *s = xmalloc(len + 1);
memcpy(s, str, len);
s[len] = 0;
return s;
}
static struct token *new_token(int type)
{
struct token *tok = xmalloc(sizeof(struct token));
tok->prev = NULL;
tok->next = NULL;
tok->type = type;
tok->line = cur_line;
return tok;
}
static void free_token(struct token *tok)
{
struct token *prev = tok->prev;
struct token *next = tok->next;
if (tok == &head)
BUG();
prev->next = next;
next->prev = prev;
free(tok);
}
static void emit_token(struct token *tok)
{
tok->prev = head.prev;
tok->next = &head;
head.prev->next = tok;
head.prev = tok;
}
static void emit(int type)
{
struct token *tok = new_token(type);
tok->len = 0;
tok->text = NULL;
emit_token(tok);
}
static int emit_keyword(const char *buf, int size)
{
int i, len;
for (len = 0; len < size; len++) {
if (!isalnum((unsigned char)buf[len]))
break;
}
if (!len)
syntax(cur_line, "keyword expected\n");
for (i = TOK_H1; i < NR_TOKEN_NAMES; i++) {
if (len != token_names[i].len)
continue;
if (!strncmp(buf, token_names[i].str, len)) {
emit(i);
return len;
}
}
syntax(cur_line, "invalid keyword '@%s'\n", memdup(buf, len));
}
static int emit_text(const char *buf, int size)
{
struct token *tok;
int i;
for (i = 0; i < size; i++) {
int c = buf[i];
if (c == '@' || c == '`' || c == '*' || c == '\n' || c == '\\' || c == '\t')
break;
}
tok = new_token(TOK_TEXT);
tok->text = buf;
tok->len = i;
emit_token(tok);
return i;
}
static void tokenize(const char *buf, int size)
{
int pos = 0;
while (pos < size) {
struct token *tok;
int ch;
ch = buf[pos++];
switch (ch) {
case '@':
pos += emit_keyword(buf + pos, size - pos);
break;
case '`':
emit(TOK_ITALIC);
break;
case '*':
emit(TOK_BOLD);
break;
case '\n':
emit(TOK_NL);
cur_line++;
break;
case '\t':
emit(TOK_INDENT);
break;
case '\\':
tok = new_token(TOK_TEXT);
tok->text = buf + pos;
tok->len = 1;
pos++;
if (pos == size || buf[pos] == '\n') {
// just one '\\'
tok->text--;
}
if (tok->text[0] == '\\') {
tok->text = "\\\\";
tok->len = 2;
}
emit_token(tok);
break;
default:
pos--;
pos += emit_text(buf + pos, size - pos);
break;
}
}
}
static int is_empty_line(const struct token *tok)
{
while (tok != &head) {
int i;
switch (tok->type) {
case TOK_TEXT:
for (i = 0; i < tok->len; i++) {
if (tok->text[i] != ' ')
return 0;
}
break;
case TOK_INDENT:
break;
case TOK_NL:
return 1;
default:
return 0;
}
tok = tok->next;
}
return 1;
}
static struct token *remove_line(struct token *tok)
{
while (tok != &head) {
struct token *next = tok->next;
int type = tok->type;
free_token(tok);
tok = next;
if (type == TOK_NL)
break;
}
return tok;
}
static struct token *skip_after(struct token *tok, int type)
{
struct token *save = tok;
while (tok != &head) {
if (tok->type == type) {
tok = tok->next;
if (tok->type != TOK_NL)
syntax(tok->line, "newline expected after @%s\n",
keyword_name(type));
return tok->next;
}
if (tok->type >= TOK_H1)
syntax(tok->line, "keywords not allowed betweed @%s and @%s\n",
keyword_name(type-1), keyword_name(type));
tok = tok->next;
}
syntax(save->prev->line, "missing @%s\n", keyword_name(type));
}
static struct token *get_next_line(struct token *tok)
{
while (tok != &head) {
int type = tok->type;
tok = tok->next;
if (type == TOK_NL)
break;
}
return tok;
}
static struct token *get_indent(struct token *tok, int *ip)
{
int i = 0;
while (tok != &head && tok->type == TOK_INDENT) {
tok = tok->next;
i++;
}
*ip = i;
return tok;
}
// line must be non-empty
static struct token *check_line(struct token *tok, int *ip)
{
struct token *start;
int tok_type;
start = tok = get_indent(tok, ip);
tok_type = tok->type;
switch (tok_type) {
case TOK_TEXT:
case TOK_BOLD:
case TOK_ITALIC:
case TOK_BR:
tok = tok->next;
while (tok != &head) {
switch (tok->type) {
case TOK_TEXT:
case TOK_BOLD:
case TOK_ITALIC:
case TOK_BR:
case TOK_INDENT:
break;
case TOK_NL:
return start;
default:
syntax(tok->line, "@%s not allowed inside paragraph\n",
keyword_name(tok->type));
}
tok = tok->next;
}
break;
case TOK_H1:
case TOK_H2:
case TOK_TITLE:
if (*ip)
goto indentation;
// check arguments
tok = tok->next;
while (tok != &head) {
switch (tok->type) {
case TOK_TEXT:
case TOK_INDENT:
break;
case TOK_NL:
return start;
default:
syntax(tok->line, "@%s can contain only text\n",
keyword_name(tok_type));
}
tok = tok->next;
}
break;
case TOK_LI:
// check arguments
tok = tok->next;
while (tok != &head) {
switch (tok->type) {
case TOK_TEXT:
case TOK_BOLD:
case TOK_ITALIC:
case TOK_INDENT:
break;
case TOK_NL:
return start;
default:
syntax(tok->line, "@%s not allowed inside @li\n",
keyword_name(tok->type));
}
tok = tok->next;
}
break;
case TOK_PRE:
// checked later
break;
case TOK_RAW:
if (*ip)
goto indentation;
// checked later
break;
case TOK_ENDPRE:
case TOK_ENDRAW:
syntax(tok->line, "@%s not expected\n", keyword_name(tok->type));
break;
case TOK_NL:
case TOK_INDENT:
BUG();
break;
}
return start;
indentation:
syntax(tok->line, "indentation before @%s\n", keyword_name(tok->type));
}
static void insert_nl_before(struct token *next)
{
struct token *prev = next->prev;
struct token *new = new_token(TOK_NL);
new->prev = prev;
new->next = next;
prev->next = new;
next->prev = new;
}
static void normalize(void)
{
struct token *tok = head.next;
/*
* >= 0 if previous line was text (== amount of indent)
* -1 if previous block was @pre (amount of indent doesn't matter)
* -2 otherwise (@h1 etc., indent was 0)
*/
int prev_indent = -2;
while (tok != &head) {
struct token *start;
int i, new_para = 0;
// remove empty lines
while (is_empty_line(tok)) {
tok = remove_line(tok);
new_para = 1;
if (tok == &head)
return;
}
// skips indent
start = tok;
tok = check_line(tok, &i);
switch (tok->type) {
case TOK_TEXT:
case TOK_ITALIC:
case TOK_BOLD:
case TOK_BR:
// normal text
if (new_para && prev_indent >= -1) {
// previous line/block was text or @pre
// and there was a empty line after it
insert_nl_before(start);
}
if (!new_para && prev_indent == i) {
// join with previous line
struct token *nl = start->prev;
if (nl->type != TOK_NL)
BUG();
if ((nl->prev != &head && nl->prev->type == TOK_BR) ||
tok->type == TOK_BR) {
// don't convert \n after/before @br to ' '
free_token(nl);
} else {
// convert "\n" to " "
nl->type = TOK_TEXT;
nl->text = " ";
nl->len = 1;
}
// remove indent
while (start->type == TOK_INDENT) {
struct token *next = start->next;
free_token(start);
start = next;
}
}
prev_indent = i;
tok = get_next_line(tok);
break;
case TOK_PRE:
case TOK_RAW:
// these can be directly after normal text
// but not joined with the previous line
if (new_para && prev_indent >= -1) {
// previous line/block was text or @pre
// and there was a empty line after it
insert_nl_before(start);
}
tok = skip_after(tok->next, tok->type + 1);
prev_indent = -1;
break;
case TOK_H1:
case TOK_H2:
case TOK_LI:
case TOK_TITLE:
// remove white space after H1, H2, L1 and TITLE
tok = tok->next;
while (tok != &head) {
int type = tok->type;
struct token *next;
if (type == TOK_TEXT) {
while (tok->len && *tok->text == ' ') {
tok->text++;
tok->len--;
}
if (tok->len)
break;
}
if (type != TOK_INDENT)
break;
// empty TOK_TEXT or TOK_INDENT
next = tok->next;
free_token(tok);
tok = next;
}
// not normal text. can't be joined
prev_indent = -2;
tok = get_next_line(tok);
break;
case TOK_NL:
case TOK_INDENT:
case TOK_ENDPRE:
case TOK_ENDRAW:
BUG();
break;
}
}
}
#define output(...) fprintf(outfile, __VA_ARGS__)
static void output_buf(const char *buf, int len)
{
fwrite(buf, 1, len, outfile);
}
static void output_text(struct token *tok)
{
char buf[1024];
const char *str = tok->text;
int len = tok->len;
int pos = 0;
while (len) {
int c = *str++;
if (pos >= sizeof(buf) - 1) {
output_buf(buf, pos);
pos = 0;
}
if (c == '-')
buf[pos++] = '\\';
buf[pos++] = c;
len--;
}
if (pos)
output_buf(buf, pos);
}
static int bold = 0;
static int italic = 0;
static int indent = 0;
static struct token *output_pre(struct token *tok)
{
int bol = 1;
if (tok->type != TOK_NL)
syntax(tok->line, "newline expected after @pre\n");
output(".nf\n");
tok = tok->next;
while (tok != &head) {
if (bol) {
int i;
tok = get_indent(tok, &i);
if (i != indent && tok->type != TOK_NL)
syntax(tok->line, "indent changed in @pre\n");
}
switch (tok->type) {
case TOK_TEXT:
if (bol && tok->len && tok->text[0] == '.')
output("\\&");
output_text(tok);
break;
case TOK_NL:
output("\n");
bol = 1;
tok = tok->next;
continue;
case TOK_ITALIC:
output("`");
break;
case TOK_BOLD:
output("*");
break;
case TOK_INDENT:
// FIXME: warn
output(" ");
break;
case TOK_ENDPRE:
output(".fi\n");
tok = tok->next;
if (tok != &head && tok->type == TOK_NL)
tok = tok->next;
return tok;
default:
BUG();
break;
}
bol = 0;
tok = tok->next;
}
return tok;
}
static struct token *output_raw(struct token *tok)
{
if (tok->type != TOK_NL)
syntax(tok->line, "newline expected after @raw\n");
tok = tok->next;
while (tok != &head) {
switch (tok->type) {
case TOK_TEXT:
if (tok->len == 2 && !strncmp(tok->text, "\\\\", 2)) {
/* ugly special case
* "\\" (\) was converted to "\\\\" (\\) because
* nroff does escaping too.
*/
output("\\");
} else {
output_buf(tok->text, tok->len);
}
break;
case TOK_NL:
output("\n");
break;
case TOK_ITALIC:
output("`");
break;
case TOK_BOLD:
output("*");
break;
case TOK_INDENT:
output("\t");
break;
case TOK_ENDRAW:
tok = tok->next;
if (tok != &head && tok->type == TOK_NL)
tok = tok->next;
return tok;
default:
BUG();
break;
}
tok = tok->next;
}
return tok;
}
static struct token *output_para(struct token *tok)
{
int bol = 1;
while (tok != &head) {
switch (tok->type) {
case TOK_TEXT:
output_text(tok);
break;
case TOK_ITALIC:
italic ^= 1;
if (italic) {
output("\\fI");
} else {
output("\\fR");
}
break;
case TOK_BOLD:
bold ^= 1;
if (bold) {
output("\\fB");
} else {
output("\\fR");
}
break;
case TOK_BR:
if (bol) {
output(".br\n");
} else {
output("\n.br\n");
}
bol = 1;
tok = tok->next;
continue;
case TOK_NL:
output("\n");
return tok->next;
case TOK_INDENT:
output(" ");
break;
default:
BUG();
break;
}
bol = 0;
tok = tok->next;
}
return tok;
}
static struct token *title(struct token *tok, const char *cmd)
{
output("%s", cmd);
return output_para(tok->next);
}
static struct token *dump_one(struct token *tok)
{
int i;
tok = get_indent(tok, &i);
if (tok->type != TOK_RAW) {
while (indent < i) {
output(".RS\n");
indent++;
}
while (indent > i) {
output(".RE\n");
indent--;
}
}
switch (tok->type) {
case TOK_TEXT:
case TOK_ITALIC:
case TOK_BOLD:
case TOK_BR:
if (tok->type == TOK_TEXT && tok->len && tok->text[0] == '.')
output("\\&");
tok = output_para(tok);
break;
case TOK_H1:
tok = title(tok, ".SH ");
break;
case TOK_H2:
tok = title(tok, ".SS ");
break;
case TOK_LI:
tok = title(tok, ".TP\n");
break;
case TOK_PRE:
tok = output_pre(tok->next);
break;
case TOK_RAW:
tok = output_raw(tok->next);
break;
case TOK_TITLE:
tok = title(tok, ".TH ");
// must be after .TH
// no hyphenation, adjust left
output(".nh\n.ad l\n");
break;
case TOK_NL:
output("\n");
tok = tok->next;
break;
case TOK_ENDPRE:
case TOK_ENDRAW:
case TOK_INDENT:
BUG();
break;
}
return tok;
}
static void dump(void)
{
struct token *tok = head.next;
while (tok != &head)
tok = dump_one(tok);
}
static void process(void)
{
struct stat s = {};
const char *buf;
int fd;
fd = open(filename, O_RDONLY);
if (fd == -1)
die("opening `%s' for reading: %s\n", filename, strerror(errno));
fstat(fd, &s);
if (s.st_size) {
buf = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (buf == MAP_FAILED)
die("mmap: %s\n", strerror(errno));
tokenize(buf, s.st_size);
normalize();
}
close(fd);
dump();
}
int main(int argc, char *argv[])
{
const char *dest;
int fd;
program = argv[0];
if (argc != 3) {
fprintf(stderr, "Usage: %s <in> <out>\n", program);
return 1;
}
filename = argv[1];
dest = argv[2];
snprintf(tmp_file, sizeof(tmp_file), "%s.XXXXXX", dest);
fd = mkstemp(tmp_file);
if (fd < 0)
die("creating %s: %s\n", tmp_file, strerror(errno));
outfile = fdopen(fd, "w");
if (!outfile)
die("opening %s: %s\n", tmp_file, strerror(errno));
process();
if (rename(tmp_file, dest))
die("renaming %s to %s: %s\n", tmp_file, dest, strerror(errno));
return 0;
}