git.strcat.st

/strcat/sgit.git/ - summarytreelogarchive

subject
add RAW file option, binary file handling, nicer size format
commit
1f5a4a04414c81268508fb74fadd4783a0a42175
date
2026-04-26T16:06:06Z
message
diff
 sgit.c | 447 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 434 insertions(+), 13 deletions(-)

diff --git a/sgit.c b/sgit.c
index 7544caf..d670231 100644
--- a/sgit.c
+++ b/sgit.c
@@ -4,6 +4,7 @@
 #include <sys/stat.h>
 
 #include <dirent.h>
+#include <ctype.h>
 #include <errno.h>
 #include <limits.h>
 #include <stdarg.h>
@@ -45,6 +46,7 @@ struct tree_node {
 };
 
 static const char *progname;
+static int raw_mode;
 
 static void	 err(int, const char *, ...);
 static void	 errx(int, const char *, ...);
@@ -54,6 +56,8 @@ static void	 append_str(char **, size_t *, size_t *, const char *);
 static void	 add_repo(struct repo **, size_t *, size_t *, const char *);
 static void	 discover_repos(const char *, struct repo **, size_t *, size_t *);
 static void	 ensure_dir(const char *);
+static void	 ensure_parent_dir(const char *);
+static void	 clear_html_files(const char *);
 static void	 free_repos(struct repo *, size_t);
 static char	*join3(const char *, const char *, const char *);
 static void	 load_repo_meta(struct repo *);
@@ -62,16 +66,20 @@ static void	 make_commit_page(const struct repo *, const char *, const char *,
 static void	 make_files_page(const struct repo *, const char *);
 static char	*read_git_meta_file(const struct repo *, const char *);
 static void	 make_index(const char *, struct repo *, size_t);
-static void	 make_readme_page(const struct repo *, const char *);
+static void	 make_readme_page(const struct repo *, const char *, int);
 static void	 make_repo_page(const struct repo *);
 static char	*file_page_name(const char *);
+static int	 has_suffix_ci(const char *, const char *);
+static void	 render_markdown(FILE *, const char *);
+static void	 render_markdown_inline(FILE *, const char *);
 static void	 render_tree(FILE *, const struct repo *, struct tree_node *,
 	        int);
 static int	 repo_cmp(const void *, const void *);
+static size_t	 read_blob_size(const struct repo *, const char *);
 static char	*read_cache_head(const char *);
 static char	*read_cmd_output(const char *, ...);
 static char	*read_file_head(const struct repo *, const char *);
-static char	*read_readme(const struct repo *);
+static char	*read_readme(const struct repo *, int *);
 static char	*repo_basename(const char *);
 static int	 repo_changed(struct repo *);
 static int	 repo_has_git(const char *);
@@ -79,6 +87,10 @@ static char	*split_field(char **);
 static int	 stat_is_dir(const char *);
 static char	*str_dup(const char *);
 static char	*trim_newline(char *);
+static size_t	 text_loc(const char *);
+static int	 text_content_p(const char *, size_t, size_t);
+static void	 format_size(size_t, char *, size_t);
+static void	 write_raw_blob(const struct repo *, const char *);
 static struct tree_node *tree_add_path(struct tree_node *, const char *);
 static int	 tree_cmp(const void *, const void *);
 static void	 tree_free(struct tree_node *);
@@ -87,6 +99,7 @@ static struct tree_node *tree_root_from_files(const char *);
 static void	 usage(void);
 static void	 write_cache_head(const char *, const char *);
 static void	 write_html_escaped(FILE *, const char *);
+static void	 write_html_escaped_len(FILE *, const char *, size_t);
 static char	*write_shell_quoted(const char *);
 
 int
@@ -94,6 +107,7 @@ main(int argc, char *argv[])
 {
 	char *argv0;
 	const char *input, *output;
+	const char *raw;
 	struct repo *repos;
 	size_t cap, i, nrepos;
 	int ch, multi;
@@ -108,6 +122,10 @@ main(int argc, char *argv[])
 	argc -= optind;
 	argv += optind;
 	progname = argv0;
+	raw = getenv("RAW");
+	raw_mode = 0;
+	if (raw != NULL && strcmp(raw, "1") == 0)
+	    raw_mode = 1;
 	if (argc == 0) {
 	    input = "repos";
 	    output = "out";
@@ -248,6 +266,66 @@ trim_newline(char *s)
 	return s;
 }
 
+static size_t
+text_loc(const char *s)
+{
+	size_t loc;
+
+	if (*s == '\0')
+	    return 0;
+	loc = 0;
+	for (; *s != '\0'; s++) {
+	    if (*s == '\n')
+	        loc++;
+	}
+	return loc + 1;
+}
+
+static int
+text_content_p(const char *s, size_t nbytes, size_t blob_size)
+{
+	size_t i;
+	unsigned char c;
+
+	if (nbytes < blob_size)
+	    return 0;
+	for (i = 0; i < nbytes; i++) {
+	    c = (unsigned char)s[i];
+	    if (c == '\n' || c == '\r' || c == '\t')
+	        continue;
+	    if (c >= 0x20 || c >= 0x80)
+	        continue;
+	    return 0;
+	}
+	return 1;
+}
+
+static void
+format_size(size_t nbytes, char *buf, size_t buflen)
+{
+	const char *units[] = { "bytes", "kb", "mb", "gb", "tb", "pb" };
+	double v;
+	size_t i, nunits;
+
+	nunits = sizeof(units) / sizeof(units[0]);
+	if (nbytes < 1024) {
+	    snprintf(buf, buflen, "%lu bytes", (unsigned long)nbytes);
+	    return;
+	}
+	v = (double)nbytes;
+	i = 0;
+	while (v >= 1024.0 && i + 1 < nunits) {
+	    v /= 1024.0;
+	    i++;
+	}
+	if (v >= 100.0)
+	    snprintf(buf, buflen, "%.0f %s", v, units[i]);
+	else if (v >= 10.0)
+	    snprintf(buf, buflen, "%.1f %s", v, units[i]);
+	else
+	    snprintf(buf, buflen, "%.2f %s", v, units[i]);
+}
+
 static int
 stat_is_dir(const char *path)
 {
@@ -508,6 +586,52 @@ ensure_dir(const char *path)
 	    err(1, "mkdir %s", buf);
 }
 
+static void
+ensure_parent_dir(const char *path)
+{
+	char *copy;
+	char *p;
+
+	copy = str_dup(path);
+	if (copy == NULL)
+	    err(1, "malloc");
+	p = strrchr(copy, '/');
+	if (p != NULL) {
+	    *p = '\0';
+	    if (*copy != '\0')
+	        ensure_dir(copy);
+	}
+	free(copy);
+}
+
+static void
+clear_html_files(const char *dir)
+{
+	struct dirent *de;
+	char *path;
+	DIR *dp;
+	size_t len;
+
+	dp = opendir(dir);
+	if (dp == NULL)
+	    return;
+	while ((de = readdir(dp)) != NULL) {
+	    if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+	        continue;
+	    len = strlen(de->d_name);
+	    if (len < 6 || strcmp(de->d_name + len - 5, ".html") != 0)
+	        continue;
+	    path = join3(dir, "/", de->d_name);
+	    if (path == NULL)
+	        err(1, "malloc");
+	    if (unlink(path) == -1 && errno != ENOENT)
+	        warn("unlink %s", path);
+	    free(path);
+	}
+	if (closedir(dp) == -1)
+	    warn("closedir %s", dir);
+}
+
 static char *
 read_cache_head(const char *outdir)
 {
@@ -642,7 +766,15 @@ repo_changed(struct repo *r)
 static void
 write_html_escaped(FILE *fp, const char *s)
 {
-	for (; *s != '\0'; s++) {
+	write_html_escaped_len(fp, s, strlen(s));
+}
+
+static void
+write_html_escaped_len(FILE *fp, const char *s, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
 	    switch (*s) {
 	    case '&':
 	        fputs("&amp;", fp);
@@ -660,6 +792,7 @@ write_html_escaped(FILE *fp, const char *s)
 	        fputc((unsigned char)*s, fp);
 	        break;
 	    }
+	    s++;
 	}
 }
 
@@ -735,13 +868,14 @@ make_commit_page(const struct repo *r, const char *hash, const char *short_hash,
 }
 
 static char *
-read_readme(const struct repo *r)
+read_readme(const struct repo *r, int *is_md)
 {
 	const char *names[] = { "README.md", "README", "README.txt", "readme.md",
 	    "readme", NULL };
 	char *out, *qname, *qpath;
 	size_t i;
 
+	*is_md = 0;
 	qpath = write_shell_quoted(r->path);
 	if (qpath == NULL)
 	    err(1, "malloc");
@@ -755,6 +889,7 @@ read_readme(const struct repo *r)
 	    if (out == NULL)
 	        continue;
 	    if (out[0] != '\0') {
+	        *is_md = has_suffix_ci(names[i], ".md");
 	        free(qpath);
 	        return out;
 	    }
@@ -764,6 +899,183 @@ read_readme(const struct repo *r)
 	return str_dup("");
 }
 
+static int
+has_suffix_ci(const char *s, const char *suffix)
+{
+	size_t i, ls, lsf;
+
+	ls = strlen(s);
+	lsf = strlen(suffix);
+	if (lsf > ls)
+	    return 0;
+	for (i = 0; i < lsf; i++) {
+	    if (tolower((unsigned char)s[ls - lsf + i]) !=
+	        tolower((unsigned char)suffix[i]))
+	        return 0;
+	}
+	return 1;
+}
+
+static void
+render_markdown_inline(FILE *fp, const char *line)
+{
+	const char *code_end, *link_close, *link_end, *p, *url_start;
+	size_t n;
+
+	p = line;
+	while (*p != '\0') {
+	    if (*p == '`') {
+	        code_end = strchr(p + 1, '`');
+	        if (code_end != NULL) {
+	            fputs("<code>", fp);
+	            write_html_escaped_len(fp, p + 1,
+	                (size_t)(code_end - (p + 1)));
+	            fputs("</code>", fp);
+	            p = code_end + 1;
+	            continue;
+	        }
+	    }
+	    if (*p == '[') {
+	        link_close = strchr(p + 1, ']');
+	        if (link_close != NULL && *(link_close + 1) == '(') {
+	            url_start = link_close + 2;
+	            link_end = strchr(url_start, ')');
+	            if (link_end != NULL) {
+	                fputs("<a href=\"", fp);
+	                write_html_escaped_len(fp, url_start,
+	                    (size_t)(link_end - url_start));
+	                fputs("\">", fp);
+	                write_html_escaped_len(fp, p + 1,
+	                    (size_t)(link_close - (p + 1)));
+	                fputs("</a>", fp);
+	                p = link_end + 1;
+	                continue;
+	            }
+	        }
+	    }
+	    n = strcspn(p, "`[");
+	    if (n == 0) {
+	        write_html_escaped_len(fp, p, 1);
+	        p++;
+	    } else {
+	        write_html_escaped_len(fp, p, n);
+	        p += n;
+	    }
+	}
+}
+
+static void
+render_markdown(FILE *fp, const char *md)
+{
+	char *line;
+	char *next;
+	char *save;
+	int in_code, in_list, in_p, level;
+
+	save = str_dup(md);
+	if (save == NULL)
+	    err(1, "malloc");
+	in_code = 0;
+	in_list = 0;
+	in_p = 0;
+	line = save;
+	while (line != NULL) {
+	    next = strchr(line, '\n');
+	    if (next != NULL) {
+	        *next = '\0';
+	        next++;
+	    }
+	    while (*line == ' ' || *line == '\t')
+	        line++;
+	    if (strncmp(line, "```", 3) == 0) {
+	        if (in_p) {
+	            fputs("</p>\n", fp);
+	            in_p = 0;
+	        }
+	        if (in_list) {
+	            fputs("</ul>\n", fp);
+	            in_list = 0;
+	        }
+	        if (!in_code)
+	            fputs("<pre>", fp);
+	        else
+	            fputs("</pre>\n", fp);
+	        in_code = !in_code;
+	        line = next;
+	        continue;
+	    }
+	    if (in_code) {
+	        write_html_escaped(fp, line);
+	        fputc('\n', fp);
+	        line = next;
+	        continue;
+	    }
+	    if (*line == '\0') {
+	        if (in_p) {
+	            fputs("</p>\n", fp);
+	            in_p = 0;
+	        }
+	        if (in_list) {
+	            fputs("</ul>\n", fp);
+	            in_list = 0;
+	        }
+	        line = next;
+	        continue;
+	    }
+	    if (*line == '#') {
+	        if (in_p) {
+	            fputs("</p>\n", fp);
+	            in_p = 0;
+	        }
+	        if (in_list) {
+	            fputs("</ul>\n", fp);
+	            in_list = 0;
+	        }
+	        level = 0;
+	        while (*line == '#' && level < 6) {
+	            level++;
+	            line++;
+	        }
+	        while (*line == ' ')
+	            line++;
+	        fprintf(fp, "<h%d>", level == 0 ? 1 : level);
+	        render_markdown_inline(fp, line);
+	        fprintf(fp, "</h%d>\n", level == 0 ? 1 : level);
+	        line = next;
+	        continue;
+	    }
+	    if ((line[0] == '-' || line[0] == '*') && line[1] == ' ') {
+	        if (in_p) {
+	            fputs("</p>\n", fp);
+	            in_p = 0;
+	        }
+	        if (!in_list) {
+	            fputs("<ul>\n", fp);
+	            in_list = 1;
+	        }
+	        fputs("<li>", fp);
+	        render_markdown_inline(fp, line + 2);
+	        fputs("</li>\n", fp);
+	        line = next;
+	        continue;
+	    }
+	    if (!in_p) {
+	        fputs("<p>", fp);
+	        in_p = 1;
+	    } else
+	        fputs("<br>\n", fp);
+	    render_markdown_inline(fp, line);
+	    line = next;
+	}
+	if (in_code)
+	    fputs("</pre>\n", fp);
+	if (in_p)
+	    fputs("</p>\n", fp);
+	if (in_list)
+	    fputs("</ul>\n", fp);
+	free(save);
+}
+
 static char *
 read_git_meta_file(const struct repo *r, const char *name)
 {
@@ -799,7 +1111,7 @@ read_git_meta_file(const struct repo *r, const char *name)
 }
 
 static void
-make_readme_page(const struct repo *r, const char *readme)
+make_readme_page(const struct repo *r, const char *readme, int readme_md)
 {
 	char *html;
 	FILE *fp;
@@ -842,9 +1154,13 @@ make_readme_page(const struct repo *r, const char *readme)
 
 	if (readme[0] != '\0') {
 	    fputs("<h2>readme</h2>\n", fp);
-	    fputs("<pre>", fp);
-	    write_html_escaped(fp, readme);
-	    fputs("</pre>\n", fp);
+	    if (readme_md)
+	        render_markdown(fp, readme);
+	    else {
+	        fputs("<pre>", fp);
+	        write_html_escaped(fp, readme);
+	        fputs("</pre>\n", fp);
+	    }
 	}
 	fputs("</body>\n</html>\n", fp);
 
@@ -892,6 +1208,73 @@ read_file_head(const struct repo *r, const char *path)
 	return out;
 }
 
+static size_t
+read_blob_size(const struct repo *r, const char *path)
+{
+	char *end, *out, *qpath, *qspec, *spec;
+	unsigned long n;
+
+	spec = join3("HEAD:", "", path);
+	if (spec == NULL)
+	    err(1, "malloc");
+	qpath = write_shell_quoted(r->path);
+	qspec = write_shell_quoted(spec);
+	free(spec);
+	if (qpath == NULL || qspec == NULL)
+	    err(1, "malloc");
+	out = read_cmd_output("git -C %s cat-file -s %s 2>/dev/null", qpath,
+	    qspec);
+	free(qpath);
+	free(qspec);
+	if (out == NULL)
+	    return 0;
+	trim_newline(out);
+	errno = 0;
+	n = strtoul(out, &end, 10);
+	free(out);
+	if (errno != 0 || end == NULL || *end != '\0')
+	    return 0;
+	return (size_t)n;
+}
+
+static void
+write_raw_blob(const struct repo *r, const char *path)
+{
+	char *cmd, *dst, *qdst, *qpath, *qspec, *spec;
+	int n, rc;
+
+	dst = join3(r->outdir, "/raw/", path);
+	if (dst == NULL)
+	    err(1, "malloc");
+	ensure_parent_dir(dst);
+	qpath = write_shell_quoted(r->path);
+	spec = join3("HEAD:", "", path);
+	if (qpath == NULL || spec == NULL)
+	    err(1, "malloc");
+	qspec = write_shell_quoted(spec);
+	qdst = write_shell_quoted(dst);
+	free(spec);
+	if (qspec == NULL || qdst == NULL)
+	    err(1, "malloc");
+	n = snprintf(NULL, 0, "git -C %s cat-file blob %s > %s 2>/dev/null",
+	    qpath, qspec, qdst);
+	if (n < 0)
+	    errx(1, "snprintf");
+	cmd = malloc((size_t)n + 1);
+	if (cmd == NULL)
+	    err(1, "malloc");
+	snprintf(cmd, (size_t)n + 1, "git -C %s cat-file blob %s > %s 2>/dev/null",
+	    qpath, qspec, qdst);
+	rc = system(cmd);
+	if (rc != 0)
+	    warn("write raw blob %s", path);
+	free(cmd);
+	free(dst);
+	free(qdst);
+	free(qpath);
+	free(qspec);
+}
+
 static struct tree_node *
 tree_new(const char *name, const char *path, int is_dir)
 {
@@ -1036,11 +1419,33 @@ render_tree(FILE *fp, const struct repo *r, struct tree_node *n, int depth)
 {
 	char *blob, *html, *name;
 	char *content;
+	char hsize[64];
 	FILE *bfp;
-	size_t i;
+	size_t blob_size, i, loc, nbytes;
 
 	if (!n->is_dir) {
 	    name = file_page_name(n->path);
+	    if (raw_mode)
+	        write_raw_blob(r, n->path);
+	    blob_size = read_blob_size(r, n->path);
+	    content = read_file_head(r, n->path);
+	    nbytes = strlen(content);
+	    if (!text_content_p(content, nbytes, blob_size)) {
+	        if (raw_mode) {
+	            fputs("<li><a href=\"raw/", fp);
+	            write_html_escaped(fp, n->path);
+	            fputs("\"><code>", fp);
+	            write_html_escaped(fp, n->path);
+	            fputs("</code></a></li>\n", fp);
+	        } else {
+	            fputs("<li><code>", fp);
+	            write_html_escaped(fp, n->path);
+	            fputs("</code></li>\n", fp);
+	        }
+	        free(content);
+	        free(name);
+	        return;
+	    }
 	    fputs("<li><a href=\"file/", fp);
 	    write_html_escaped(fp, name);
 	    fputs("\"><code>", fp);
@@ -1051,7 +1456,7 @@ render_tree(FILE *fp, const struct repo *r, struct tree_node *n, int depth)
 	    if (blob == NULL)
 	        err(1, "malloc");
 	    html = blob;
-	    content = read_file_head(r, n->path);
+	    loc = text_loc(content);
 	    bfp = fopen(html, "w");
 	    if (bfp == NULL)
 	        err(1, "fopen %s", html);
@@ -1069,7 +1474,21 @@ render_tree(FILE *fp, const struct repo *r, struct tree_node *n, int depth)
 	        "<a href=\"../history.html\">history</a></p>\n<h1><code>",
 	        bfp);
 	    write_html_escaped(bfp, n->path);
-	    fputs("</code></h1>\n<pre>", bfp);
+	    fputs("</code></h1>\n<p class=\"meta\">", bfp);
+	    fputs("<span class=\"meta-k\">file</span> : ", bfp);
+	    write_html_escaped(bfp, n->path);
+	    fputs("<br><span class=\"meta-k\">loc</span> : ", bfp);
+	    fprintf(bfp, "%lu", (unsigned long)loc);
+	    fputs("<br><span class=\"meta-k\">size</span> : ", bfp);
+	    format_size(nbytes, hsize, sizeof(hsize));
+	    fprintf(bfp, "%lu bytes (%s)", (unsigned long)nbytes, hsize);
+	    if (raw_mode) {
+	        fputs("<br><span class=\"meta-k\">raw</span> : <a href=\"../raw/",
+	            bfp);
+	        write_html_escaped(bfp, n->path);
+	        fputs("\">raw</a>", bfp);
+	    }
+	    fputs("</p>\n<pre>", bfp);
 	    write_html_escaped(bfp, content);
 	    fputs("</pre>\n</body>\n</html>\n", bfp);
 	    if (fclose(bfp) == EOF)
@@ -1108,6 +1527,7 @@ make_files_page(const struct repo *r, const char *files)
 	if (dir == NULL)
 	    err(1, "malloc");
 	ensure_dir(dir);
+	clear_html_files(dir);
 	free(dir);
 
 	html = join3(r->outdir, "/", "files.html");
@@ -1146,6 +1566,7 @@ make_repo_page(const struct repo *r)
 	char *commits, *files, *full_hash, *history, *line, *qpath, *readme;
 	char *short_hash, *author, *date, *subject;
 	FILE *fp;
+	int readme_md;
 
 	qpath = write_shell_quoted(r->path);
 	if (qpath == NULL)
@@ -1163,11 +1584,11 @@ make_repo_page(const struct repo *r)
 	    err(1, "malloc");
 	free(qpath);
 
-	readme = read_readme(r);
+	readme = read_readme(r, &readme_md);
 	if (readme == NULL)
 	    err(1, "malloc");
 
-	make_readme_page(r, readme);
+	make_readme_page(r, readme, readme_md);
 	make_files_page(r, files);
 
 	history = join3(r->outdir, "/", "history.html");