commit 251a0d00a365a926ff4060d38cc52b7e22355e5e Author: Markus Birth Date: Sat Oct 1 20:49:57 2022 +0200 Initial commit. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1070a08 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +wallabag2GoodLinks converter +============================ + +Converts a [wallabag](https://www.wallabag.org/) JSON export to [GoodLinks](https://goodlinks.app/) format. + + +Wallabag format +--------------- + +* `is_archived` (0/1) +* `is_starred` (0/1) +* `tags` (List) +* `is_public` (true/false) +* `id` +* `title` +* `url` +* `given_url` +* `content` (HTML) +* `created_at` (yyyy-mm-ddThh:mm:ss+hh:mm) +* `updated_at` +* `published_at` +* `published_by` (List) +* `annotations` (List) +* `mimetype` (text/html) +* `language` (en) +* `reading_time` (Int) +* `domain_name` +* `preview_picture` (URL) +* `http_status` ("200") +* `headers` (Object) + + +GoodLinks format +---------------- + +* `readAt` (Unixtime) +* `addedAt` +* `summary` +* `starred` (true/false) +* `title` +* `tags` (List) +* `url` diff --git a/walla2goodlinks.py b/walla2goodlinks.py new file mode 100755 index 0000000..d0ce3b8 --- /dev/null +++ b/walla2goodlinks.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +import html +import json +import re +from datetime import datetime + +# https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string +CLEANR = re.compile(r"<.*?>") + +with open("Wallabag All articles.json", "rt") as f: + json_obj = json.load(f) + +output_obj = [] +for rec in json_obj: + time_added = datetime.fromisoformat(rec["created_at"]) + time_read = time_added + html_str = html.unescape(rec["content"]) + html_str = html_str.replace("\n", " ") + html_str = re.sub(CLEANR, "", html_str) + tags = rec["tags"] + tags.append("+IMPORTED") + new_obj = { + "readAt": time_read.timestamp(), + "addedAt": time_added.timestamp(), + "summary": html_str[:199], + "starred": (rec["is_starred"] == 1), + "title": rec["title"], + "tags": tags, + "url": rec["url"], + } + print(repr(rec)) + print(repr(new_obj)) + output_obj.append(new_obj) + +with open("walla2goodlinks.json", "w") as f: + json.dump(output_obj, f)