#!/usr/bin/env python3

# Source: https://tildegit.org/solderpunk/gemfeed

import argparse
from datetime import datetime, timezone
import os
# import os.path
from pathlib import Path
import re
import stat
import urllib.parse

from feedgen.feed import FeedGenerator

# monkey-patch Gemini support in urllib.parse
# see https://github.com/python/cpython/blob/master/Lib/urllib/parse.py
urllib.parse.uses_relative.append("gemini")
urllib.parse.uses_netloc.append("gemini")

file_extensions = ["gmi", "gemini"]
index_files = ["index.gmi", "index.gemini"]
# index_prefix = "index"

# index_files = []
# for extension in file_extensions:
#     index_files.extend("index.{}".format(extension))

def is_world_readable(file):
    """
    Return True if the file is world readable, otherwise return False.
    """
    stat_result = os.stat(file)
    return stat_result.st_mode & stat.S_IROTH

def extract_first_heading(file, default=""):
    """
    Open a file which is presumed to contain text/gemini content and return
    the contents of the first heading line (regardless of heading level).
    If no heading lines are found, return the specified default.
    """
    with open(file) as f:
        for line in f:
            if line.startswith("#"):
                while line[0] == "#":
                    line = line[1:]
                return line.strip()
    return default

def get_feed_title(directory):
    """
    If an index.gmi or index.gemini file exists and is world-readable, return
    the content of the first heading line in the file, otherwise return a
    default feed title.
    """
    # By default, use the deepest directory name as a feed title
    # This needs a little care, as os.path.basename will return an empty
    # string if `directory` ends in a trailing slash...
    head, default = os.path.split(directory)
    if not default:
        default = os.path.basename(head)
    # Check for index files which may override the default
    for index_file in index_files:
        index_file = os.path.join(directory, index_file)
        if os.path.exists(index_file) and is_world_readable(index_file):
            return extract_first_heading(index_file, default)
    return default

def find_files(directory, time_func, n, recursive):
    """
    Return the n most recently created world-readable files with extensions of
    .gmi or .gemini, as a list sorted from most to least recent.
    """
    files = []
    for extension in file_extensions:
        files.extend(list(Path(directory).rglob("*.{}".format(extension))))
        indexes = list(Path(directory).rglob("index.{}".format(extension)))
        for index in indexes:
            if index in files:
                files.remove(index)
        files = [file for file in files if is_world_readable(file)]
    return sorted(files, reverse=False)

def populate_entry_from_file(entry, file, base_url, time_func):
    """
    Set the id, title, updated and link attributes of the provided
    FeedGenerator entry object according the contents of the named
    Gemini file and the base URL.
    """
    url = urllib.parse.urljoin(base_url, file.name)
    entry.guid(url)
    entry.link(href=url, rel="alternate", type="text/gemini", hreflang=re.search("^\\.[a-z]{2}-[A-Z]{2}\\.(gmi|gemini)$", file.name))
    # published = get_publish_time(file, time_func)
    # entry.published(published)
    updated = get_update_time(file, time_func)
    entry.updated(updated)
    default_title = os.path.splitext(file.name)[0]
    title = extract_first_heading(file, default_title)
    entry.title(title)

def get_update_time(file, time_func):
    """
    Return an update time for a Gemini file.

    If the filename begins with an ISO8601 date stamp, that date
    (with a time of midnight) will be used.  Otherwise, the file
    "creation time" (which in unix is actually the time of last
    metadata update) will be used instead as a best estimate.
    """
    # Check for leading YYYY-MM-DD
    basename = file.name
    if re.search("^[0-9]{4}-[01][0-9]-[0-3][0-9]", basename):
        return datetime.fromisoformat(basename[0:10] + 'T12:00:00+00:00')
    else:
        updated = time_func(file)
        return datetime.fromtimestamp(updated, tz=timezone.utc)

# def get_update_time(file):
#     """
#     Return an update time for a Gemini file.

#     The file modification time will be used.
#     """
#     updated = os.path.getmtime(file)
#     return datetime.fromtimestamp(round(updated), tz=timezone.utc)

def build_feed(directory, time_func, base_url, output, n, language,
        title="", subtitle="", author="", email="",
        verbose=False, recursive=False):
    """
    Build an Atom feed for all world readable Gemini files in the current
    directory, and write it to atom.xml.
    """
    # If a title hasn't been provided, try to get one from an index page
    if not title:
        title = get_feed_title(directory)

    # Let user know feed title and URL
    feed_url = urllib.parse.urljoin(base_url, output)
    if verbose:
        print('Generating feed "{}", which should be served from {}'.format(title, feed_url))

    # Setup feed
    feed = FeedGenerator()
    feed.generator(generator="", version="", uri="")
    feed.id(base_url)
    feed.title(title)
    if subtitle:
        feed.subtitle(subtitle)
    feed.language(language)
    author_details = {}
    if author:
        author_details["name"] = author
    if email:
        author_details["email"] = email
    if author_details:
        feed.author(author_details)
    feed.link(href=feed_url, rel="self", type="application/atom+xml")
    feed.link(href=base_url, rel="alternate", type="text/gemini", hreflang=language)

    # Add one entry per .gmi file
    files = find_files(directory, time_func, n, recursive)
    if not files:
        if verbose:
            print("No world-readable Gemini content found")
        return
    last_file_index = len(files) - 1
    for n, file in enumerate(files):
        entry = feed.add_entry()
        populate_entry_from_file(entry, file, base_url, time_func)
        if n == last_file_index:
            feed.updated(entry.updated())
        if verbose:
            print("Adding {} with title '{}'...".format(file.name,
                entry.title()))

    # Write file
    output = os.path.join(directory, output)
    feed.atom_file(output, pretty=True)
    if verbose:
        print("Wrote Atom feed to {}.".format(output))

def main():
    """
    Parse command line arguments, do some minor processing, and then invoke
    the build_feed command with the provided settings.
    """

    # Get cwd as default value for --directory
    cwd = os.getcwd()

    # Parse arguments
    parser = argparse.ArgumentParser(description='Generate an Atom feed for Gemini content.')
    parser.add_argument('-a', '--author',
            dest='author', type=str,
            help="feed author's name")
    parser.add_argument('-b', '--base',
            dest='base_url', type=str, required=True,
            help='base URL for feed and entries')
    parser.add_argument('-d', '--directory',
            dest='directory', type=str, default=cwd,
            help='directory to find content and save feed to')
    parser.add_argument('-e', '--email',
            dest='email', type=str,
            help="feed author's email address")
    parser.add_argument('-l', '--language',
            dest='language', type=str, default="en-US",
            help="feed's language")
    parser.add_argument('-n',
            dest='n', type=int, default=10,
            help='include N most recently created files in feed (default 10)')
    parser.add_argument('-o', '--output',
            dest='output', type=str, default="atom.xml",
            help='output filename')
    parser.add_argument('-r', '--recursive',
            dest='recursive', action="store_true",
            help='search Gemini files recursively in the given directory')
    parser.add_argument('-s', '--subtitle',
            dest='subtitle', type=str,
            help='feed subtitle')
    parser.add_argument('-t', '--title',
            dest='title', type=str,
            help='feed title')
    parser.add_argument('-v', '--verbose',
            dest='verbose', action="store_true",
            help='Write progress to stdout')
    parser.add_argument('--mtime',
            action="store_true",
            help='Use file modification time, not file update time, in feeds')
    args = parser.parse_args()

    # Normalise base URL
    base_url = urllib.parse.urlsplit(args.base_url)
    if not base_url.netloc and base_url.path:
        # Handle a naked domain, which urlsplit will interpet at a local path
        base_url = base_url._replace(netloc=base_url.path, path="")
    base_url = base_url._replace(scheme="gemini")
    args.base_url = urllib.parse.urlunsplit(base_url)
    if not args.base_url.endswith("/"):
        args.base_url += "/"

    # Build the feed
    time_function = os.path.getmtime if args.mtime else os.path.getctime
    build_feed(args.directory, time_function, args.base_url, args.output,
            args.n, args.language, args.title, args.subtitle, args.author,
            args.email, args.verbose, args.recursive)

if __name__ == "__main__":
    main()
<!DOCTYPE html><html><head><meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1.0"><link rel="canonical" href="gemini://michaelnordmeyer.com/files/gemfeed.py" /><title>gemini://michaelnordmeyer.com/files/gemfeed.py</title><style>.c{max-width:80em;margin-left:auto;margin-right:auto;}.l{margin:0;color:lightgray;}.m{margin-top:0;margin-bottom:0;}.b{display:list-item;list-style-type:disc;list-style-position:inside;}</style></head><body><p class="l">-- Leo's gemini proxy</p><p class="l">-- Connecting to michaelnordmeyer.com:1965...</p><p class="l">-- Connected</p><p class="l">-- Sending request</p><p class="l">-- Meta line: 20 text/plain</p><p class="l">-- Response ended</p><p class="l">-- Page fetched on Mon Jun  3 00:54:01 2024</p></body></html>