From 27e5342e822900d11224bc89cf4e3df51b1108be Mon Sep 17 00:00:00 2001 From: Markus Birth Date: Wed, 5 Jan 2022 14:04:46 +0100 Subject: [PATCH] Initial commit --- .gitignore | 4 ++ Pipfile | 11 +++++ Pipfile.lock | 70 +++++++++++++++++++++++++++++++ README.md | 38 +++++++++++++++++ config.yaml.example | 33 +++++++++++++++ jekyll2s9y.py | 78 +++++++++++++++++++++++++++++++++++ jekyllreader/__init__.py | 2 + jekyllreader/jekyllarticle.py | 69 +++++++++++++++++++++++++++++++ jekyllreader/jekyllreader.py | 25 +++++++++++ s9ywriter/__init__.py | 2 + s9ywriter/s9yentry.py | 14 +++++++ s9ywriter/s9ywriter.py | 75 +++++++++++++++++++++++++++++++++ 12 files changed, 421 insertions(+) create mode 100644 .gitignore create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100644 README.md create mode 100644 config.yaml.example create mode 100755 jekyll2s9y.py create mode 100644 jekyllreader/__init__.py create mode 100644 jekyllreader/jekyllarticle.py create mode 100644 jekyllreader/jekyllreader.py create mode 100644 s9ywriter/__init__.py create mode 100644 s9ywriter/s9yentry.py create mode 100644 s9ywriter/s9ywriter.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4cbc351 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.db +jekyll/ +uploads/ +/config.yaml diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..005d615 --- /dev/null +++ b/Pipfile @@ -0,0 +1,11 @@ +[[source]] +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +python-frontmatter = "*" +pytz = "*" +pyyaml = "*" + +[dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..902a1d1 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,70 @@ +{ + "_meta": { + "hash": { + "sha256": "80e20d60ab55b386ef8a6294cfd3e9712ae0ea8aeb23582f66e8534f61361e74" + }, + "pipfile-spec": 6, + "requires": {}, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "python-frontmatter": { + "hashes": [ + "sha256:766ae75f1b301ffc5fe3494339147e0fd80bc3deff3d7590a93991978b579b08", + "sha256:e98152e977225ddafea6f01f40b4b0f1de175766322004c826ca99842d19a7cd" + ], + "index": "pypi", + "version": "==1.0.0" + }, + "pytz": { + "hashes": [ + "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da", + "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798" + ], + "index": "pypi", + "version": "==2021.1" + }, + "pyyaml": { + "hashes": [ + "sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf", + "sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696", + "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393", + "sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77", + "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922", + "sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5", + "sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8", + "sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10", + "sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc", + "sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018", + "sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e", + "sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253", + "sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347", + "sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183", + "sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541", + "sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb", + "sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185", + "sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc", + "sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db", + "sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa", + "sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46", + "sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122", + "sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b", + "sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63", + "sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df", + "sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc", + "sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247", + "sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6", + "sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0" + ], + "index": "pypi", + "version": "==5.4.1" + } + }, + "develop": {} +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..d61b428 --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +jekyll2s9y importer +=================== + +This is a tool to import my Jekyll-based [GitHub Pages](https://pages.github.com) blog/wiki +into [Serendipity](https://s9y.org). + + +Preparations +------------ + +Setup Serendipity with an SQLite database. Download the database file to where you also plan to run jekyll2s9y. + + +Usage +----- + +Copy `config.yaml.example` to `config.yaml` and modify according to your needs. Note that +`jekyll_dir` has to point to your Jekyll base directory. + +If not done already, update the Python environment: + + pipenv install + +Then run the script: + + pipenv run ./jekyll2s9y.py + +Now copy the new database file (`s9y_database_output` in the config.yaml) back to your server and overwrite +the old version. Also copy the `uploads` directory containing the media files. Then, in the Serendipity admin +area, edit your configuration and change the permalink for "Entry URL structure" in any way (e.g. add a letter +to the end) to have the permalinks regenerated. Afterwards, you can undo the change again. + + +Issues +------ + +If there are two entries where the permalink generation (i.e. stripping all emojis, etc.) results in the same +permalink, only the older one can be accessed. diff --git a/config.yaml.example b/config.yaml.example new file mode 100644 index 0000000..9ae6d5f --- /dev/null +++ b/config.yaml.example @@ -0,0 +1,33 @@ +general: + jekyll_dir: jekyll + s9y_database: serendipity.db + s9y_media_dir: uploads + s9y_database_output: serendipity_new.db + + # Timezone for stamps without one + timezone: Europe/Berlin + +# As Jekyll only knows tags, specify which tags are to be +# considered s9y categories. Tags not mentioned here will +# be kept as tags (serendipity_plugin_freetag) +categories: + - know-how + - development + - gtd + - review + - hacking + - hardware + - software + - miscellaneous + +s9y_defaults: + comments: 0 + trackbacks: 0 + exflag: 1 + # CHANGE THIS TO YOUR Serendipity Username + author: mbirth + # CHANGE THIS TO YOUR Serendipity User-ID + authorid: 1 + isdraft: "false" + allow_comments: "true" + moderate_comments: "false" diff --git a/jekyll2s9y.py b/jekyll2s9y.py new file mode 100755 index 0000000..bc5d366 --- /dev/null +++ b/jekyll2s9y.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from datetime import datetime +from os import makedirs +from os.path import basename +import shutil +import pytz +import yaml +import jekyllreader +import s9ywriter + + +with open("config.yaml", "rt") as f: + config = yaml.load(f) + +print(repr(config)) + +# copy original file to working copy +shutil.copyfile(config["general"]["s9y_database"], config["general"]["s9y_database_output"]) + +DATEFORMAT_IN = "%Y-%m-%d %H:%M:%S %z" +DATEFORMAT_OUT = "%Y-%m-%d %H:%M:%S" + +LOCAL_TIMEZONE = pytz.timezone(config["general"]["timezone"]) + +# MAIN SCRIPT +jk = jekyllreader.JekyllReader(config["general"]["jekyll_dir"]) +s9y = s9ywriter.S9YWriter(config["general"]["s9y_database_output"]) +for i in range(0, jk.len()): + print(f"Item: {i}") + jk_article = jk.get(i) + + new_entry = s9ywriter.S9YEntry() + for k, v in config["s9y_defaults"].items(): + setattr(new_entry, k, v) + + new_entry.title = jk_article.metadata["title"] + date_created = datetime.strptime(jk_article.metadata["created"], DATEFORMAT_IN).astimezone(LOCAL_TIMEZONE) + date_updated = datetime.strptime(jk_article.metadata["updated"], DATEFORMAT_IN).astimezone(LOCAL_TIMEZONE) + new_entry.timestamp = int(date_created.timestamp()) + new_entry.last_modified = int(date_updated.timestamp()) + + # Handle images + img_target_dir = config["general"]["s9y_media_dir"] + "/" + str(date_created.year) + "/" + img_files = jk_article.images + print(repr(img_files)) + jk_article.replace_imagepaths("/" + img_target_dir) + for img in img_files: + img_name = basename(img) + makedirs(img_target_dir, exist_ok=True) + shutil.copyfile(img, img_target_dir + img_name) + + # Handle body: Split into body+extended if possible + content = jk_article.body + content = content.replace("\r", "") + splits = content.split("\n\n", 1) + if len(splits) == 1: + new_entry.body = content + else: + splits[1] = splits[1].strip("\n") + (new_entry.body, new_entry.extended) = splits + + # Handle tags/categories and other metadata + for t in jk_article.metadata["tags"]: + if t in config["categories"]: + new_entry.categories.append(t) + else: + new_entry.tags.append(t) + + if "language" in jk_article.metadata and jk_article.metadata["language"] != "en": + if jk_article.metadata["language"] == "de": + new_entry.title += " 🇩🇪" + else: + new_entry.title += " (" + jk_article.metadata["language"] + ")" + + s9y.add_entry(new_entry) +s9y.commit() diff --git a/jekyllreader/__init__.py b/jekyllreader/__init__.py new file mode 100644 index 0000000..f37d1bd --- /dev/null +++ b/jekyllreader/__init__.py @@ -0,0 +1,2 @@ +from .jekyllreader import JekyllReader +from .jekyllarticle import JekyllArticle diff --git a/jekyllreader/jekyllarticle.py b/jekyllreader/jekyllarticle.py new file mode 100644 index 0000000..8610374 --- /dev/null +++ b/jekyllreader/jekyllarticle.py @@ -0,0 +1,69 @@ +from functools import partial +from os.path import basename +import re +import frontmatter + + +class JekyllArticle(): + RE_IMG = r'!\[(.*?)\]\((.+?)( [\'"].+[\'"])?\)' + RE_HTML_IMG = r' ue, etc. + # WORKAROUND: Edit your blog settings and change the permalink to let S9Y regenerate them + # then change back to desired value + + for category in list(set(entry.categories)): + self.add_category(entry_id, category) + + for tag in list(set(entry.tags)): + self.add_tag(entry_id, tag) + + def commit(self): + self.db.commit() + + def add_category(self, entry_id: int, category_name: str): + # Tables: category, entrycat, access + # Category: categoryid, category_name, "", "", 0, 0, 0, parentid, NULL, NULL + # entrycat: entryid, categoryid + print(f"{entry_id} - {category_name}") + sql = "SELECT categoryid FROM category WHERE category_name = ?" + result = self.db.execute(sql, [category_name]) + cat = result.fetchone() + if cat: + category_id = cat[0] + else: + # Category does not yet exist, add it + sql = "INSERT INTO category (category_name) VALUES (?)" + result = self.db.execute(sql, [category_name]) + category_id = result.lastrowid + # Add access permissions + # access: 0, category_id, "category", read + # access: 0, category_id, "category", write + sql = "INSERT INTO access VALUES (?, ?, ?, ?, ?)" + self.db.execute(sql, [0, category_id, "category", "read", ""]) + self.db.execute(sql, [0, category_id, "category", "write", ""]) + # Add permalink + sql = "INSERT INTO permalinks VALUES (?, ?, ?, ?)" + self.db.execute(sql, [f"categories/{category_name}", category_id, "category", None]) + + sql = "INSERT INTO entrycat VALUES (?, ?)" + self.db.execute(sql, [entry_id, category_id]) + + def add_tag(self, entry_id: int, tag: str): + # Table: entrytags (entryid, tag) + sql = "INSERT INTO entrytags VALUES (?, ?)" + self.db.execute(sql, (entry_id, tag))