#!/usr/bin/env python3 # Source: https://tildegit.org/solderpunk/gemfeed import argparse from datetime import datetime, timezone import os # import os.path from pathlib import Path import re import stat import urllib.parse from feedgen.feed import FeedGenerator # monkey-patch Gemini support in urllib.parse # see https://github.com/python/cpython/blob/master/Lib/urllib/parse.py urllib.parse.uses_relative.append("gemini") urllib.parse.uses_netloc.append("gemini") file_extensions = ["gmi", "gemini"] index_files = ["index.gmi", "index.gemini"] # index_prefix = "index" # index_files = [] # for extension in file_extensions: # index_files.extend("index.{}".format(extension)) def is_world_readable(file): """ Return True if the file is world readable, otherwise return False. """ stat_result = os.stat(file) return stat_result.st_mode & stat.S_IROTH def extract_first_heading(file, default=""): """ Open a file which is presumed to contain text/gemini content and return the contents of the first heading line (regardless of heading level). If no heading lines are found, return the specified default. """ with open(file) as f: for line in f: if line.startswith("#"): while line[0] == "#": line = line[1:] return line.strip() return default def get_feed_title(directory): """ If an index.gmi or index.gemini file exists and is world-readable, return the content of the first heading line in the file, otherwise return a default feed title. """ # By default, use the deepest directory name as a feed title # This needs a little care, as os.path.basename will return an empty # string if `directory` ends in a trailing slash... head, default = os.path.split(directory) if not default: default = os.path.basename(head) # Check for index files which may override the default for index_file in index_files: index_file = os.path.join(directory, index_file) if os.path.exists(index_file) and is_world_readable(index_file): return extract_first_heading(index_file, default) return default def find_files(directory, time_func, n, recursive): """ Return the n most recently created world-readable files with extensions of .gmi or .gemini, as a list sorted from most to least recent. """ files = [] for extension in file_extensions: files.extend(list(Path(directory).rglob("*.{}".format(extension)))) indexes = list(Path(directory).rglob("index.{}".format(extension))) for index in indexes: if index in files: files.remove(index) files = [file for file in files if is_world_readable(file)] return sorted(files, reverse=False) def populate_entry_from_file(entry, file, base_url, time_func): """ Set the id, title, updated and link attributes of the provided FeedGenerator entry object according the contents of the named Gemini file and the base URL. """ url = urllib.parse.urljoin(base_url, file.name) entry.guid(url) entry.link(href=url, rel="alternate", type="text/gemini", hreflang=re.search("^\\.[a-z]{2}-[A-Z]{2}\\.(gmi|gemini)$", file.name)) # published = get_publish_time(file, time_func) # entry.published(published) updated = get_update_time(file, time_func) entry.updated(updated) default_title = os.path.splitext(file.name)[0] title = extract_first_heading(file, default_title) entry.title(title) def get_update_time(file, time_func): """ Return an update time for a Gemini file. If the filename begins with an ISO8601 date stamp, that date (with a time of midnight) will be used. Otherwise, the file "creation time" (which in unix is actually the time of last metadata update) will be used instead as a best estimate. """ # Check for leading YYYY-MM-DD basename = file.name if re.search("^[0-9]{4}-[01][0-9]-[0-3][0-9]", basename): return datetime.fromisoformat(basename[0:10] + 'T12:00:00+00:00') else: updated = time_func(file) return datetime.fromtimestamp(updated, tz=timezone.utc) # def get_update_time(file): # """ # Return an update time for a Gemini file. # The file modification time will be used. # """ # updated = os.path.getmtime(file) # return datetime.fromtimestamp(round(updated), tz=timezone.utc) def build_feed(directory, time_func, base_url, output, n, language, title="", subtitle="", author="", email="", verbose=False, recursive=False): """ Build an Atom feed for all world readable Gemini files in the current directory, and write it to atom.xml. """ # If a title hasn't been provided, try to get one from an index page if not title: title = get_feed_title(directory) # Let user know feed title and URL feed_url = urllib.parse.urljoin(base_url, output) if verbose: print('Generating feed "{}", which should be served from {}'.format(title, feed_url)) # Setup feed feed = FeedGenerator() feed.generator(generator="", version="", uri="") feed.id(base_url) feed.title(title) if subtitle: feed.subtitle(subtitle) feed.language(language) author_details = {} if author: author_details["name"] = author if email: author_details["email"] = email if author_details: feed.author(author_details) feed.link(href=feed_url, rel="self", type="application/atom+xml") feed.link(href=base_url, rel="alternate", type="text/gemini", hreflang=language) # Add one entry per .gmi file files = find_files(directory, time_func, n, recursive) if not files: if verbose: print("No world-readable Gemini content found") return last_file_index = len(files) - 1 for n, file in enumerate(files): entry = feed.add_entry() populate_entry_from_file(entry, file, base_url, time_func) if n == last_file_index: feed.updated(entry.updated()) if verbose: print("Adding {} with title '{}'...".format(file.name, entry.title())) # Write file output = os.path.join(directory, output) feed.atom_file(output, pretty=True) if verbose: print("Wrote Atom feed to {}.".format(output)) def main(): """ Parse command line arguments, do some minor processing, and then invoke the build_feed command with the provided settings. """ # Get cwd as default value for --directory cwd = os.getcwd() # Parse arguments parser = argparse.ArgumentParser(description='Generate an Atom feed for Gemini content.') parser.add_argument('-a', '--author', dest='author', type=str, help="feed author's name") parser.add_argument('-b', '--base', dest='base_url', type=str, required=True, help='base URL for feed and entries') parser.add_argument('-d', '--directory', dest='directory', type=str, default=cwd, help='directory to find content and save feed to') parser.add_argument('-e', '--email', dest='email', type=str, help="feed author's email address") parser.add_argument('-l', '--language', dest='language', type=str, default="en-US", help="feed's language") parser.add_argument('-n', dest='n', type=int, default=10, help='include N most recently created files in feed (default 10)') parser.add_argument('-o', '--output', dest='output', type=str, default="atom.xml", help='output filename') parser.add_argument('-r', '--recursive', dest='recursive', action="store_true", help='search Gemini files recursively in the given directory') parser.add_argument('-s', '--subtitle', dest='subtitle', type=str, help='feed subtitle') parser.add_argument('-t', '--title', dest='title', type=str, help='feed title') parser.add_argument('-v', '--verbose', dest='verbose', action="store_true", help='Write progress to stdout') parser.add_argument('--mtime', action="store_true", help='Use file modification time, not file update time, in feeds') args = parser.parse_args() # Normalise base URL base_url = urllib.parse.urlsplit(args.base_url) if not base_url.netloc and base_url.path: # Handle a naked domain, which urlsplit will interpet at a local path base_url = base_url._replace(netloc=base_url.path, path="") base_url = base_url._replace(scheme="gemini") args.base_url = urllib.parse.urlunsplit(base_url) if not args.base_url.endswith("/"): args.base_url += "/" # Build the feed time_function = os.path.getmtime if args.mtime else os.path.getctime build_feed(args.directory, time_function, args.base_url, args.output, args.n, args.language, args.title, args.subtitle, args.author, args.email, args.verbose, args.recursive) if __name__ == "__main__": main() gemini://michaelnordmeyer.com/files/gemfeed.py

-- Leo's gemini proxy

-- Connecting to michaelnordmeyer.com:1965...

-- Connected

-- Sending request

-- Meta line: 20 text/plain

-- Response ended

-- Page fetched on Mon Jun 3 00:54:01 2024