Initial commit.

2022-10-01 20:49:57 +02:00 · 2022-10-01 20:49:57 +02:00 · 251a0d00a3
commit 251a0d00a3
2 changed files with 79 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,42 @@
 wallabag2GoodLinks converter
 ============================
 Converts a [wallabag](https://www.wallabag.org/) JSON export to [GoodLinks](https://goodlinks.app/) format.
 Wallabag format
 ---------------
 * `is_archived` (0/1)
 * `is_starred` (0/1)
 * `tags` (List)
 * `is_public` (true/false)
 * `id`
 * `title`
 * `url`
 * `given_url`
 * `content` (HTML)
 * `created_at` (yyyy-mm-ddThh:mm:ss+hh:mm)
 * `updated_at`
 * `published_at`
 * `published_by` (List)
 * `annotations` (List)
 * `mimetype` (text/html)
 * `language` (en)
 * `reading_time` (Int)
 * `domain_name`
 * `preview_picture` (URL)
 * `http_status` ("200")
 * `headers` (Object)
 GoodLinks format
 ----------------
 * `readAt` (Unixtime)
 * `addedAt`
 * `summary`
 * `starred` (true/false)
 * `title`
 * `tags` (List)
 * `url`
--- a/walla2goodlinks.py
+++ b/walla2goodlinks.py
@ -0,0 +1,37 @@
 #!/usr/bin/env python3
 import html
 import json
 import re
 from datetime import datetime
 # https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
 CLEANR = re.compile(r"<.*?>")
 with open("Wallabag All articles.json", "rt") as f:
    json_obj = json.load(f)
 output_obj = []
 for rec in json_obj:
    time_added = datetime.fromisoformat(rec["created_at"])
    time_read = time_added
    html_str = html.unescape(rec["content"])
    html_str = html_str.replace("\n", " ")
    html_str = re.sub(CLEANR, "", html_str)
    tags = rec["tags"]
    tags.append("+IMPORTED")
    new_obj = {
        "readAt": time_read.timestamp(),
        "addedAt": time_added.timestamp(),
        "summary": html_str[:199],
        "starred": (rec["is_starred"] == 1),
        "title": rec["title"],
        "tags": tags,
        "url": rec["url"],
    }
    print(repr(rec))
    print(repr(new_obj))
    output_obj.append(new_obj)
 with open("walla2goodlinks.json", "w") as f:
    json.dump(output_obj, f)