Initial commit.
This commit is contained in:
commit
251a0d00a3
42
README.md
Normal file
42
README.md
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
wallabag2GoodLinks converter
|
||||||
|
============================
|
||||||
|
|
||||||
|
Converts a [wallabag](https://www.wallabag.org/) JSON export to [GoodLinks](https://goodlinks.app/) format.
|
||||||
|
|
||||||
|
|
||||||
|
Wallabag format
|
||||||
|
---------------
|
||||||
|
|
||||||
|
* `is_archived` (0/1)
|
||||||
|
* `is_starred` (0/1)
|
||||||
|
* `tags` (List)
|
||||||
|
* `is_public` (true/false)
|
||||||
|
* `id`
|
||||||
|
* `title`
|
||||||
|
* `url`
|
||||||
|
* `given_url`
|
||||||
|
* `content` (HTML)
|
||||||
|
* `created_at` (yyyy-mm-ddThh:mm:ss+hh:mm)
|
||||||
|
* `updated_at`
|
||||||
|
* `published_at`
|
||||||
|
* `published_by` (List)
|
||||||
|
* `annotations` (List)
|
||||||
|
* `mimetype` (text/html)
|
||||||
|
* `language` (en)
|
||||||
|
* `reading_time` (Int)
|
||||||
|
* `domain_name`
|
||||||
|
* `preview_picture` (URL)
|
||||||
|
* `http_status` ("200")
|
||||||
|
* `headers` (Object)
|
||||||
|
|
||||||
|
|
||||||
|
GoodLinks format
|
||||||
|
----------------
|
||||||
|
|
||||||
|
* `readAt` (Unixtime)
|
||||||
|
* `addedAt`
|
||||||
|
* `summary`
|
||||||
|
* `starred` (true/false)
|
||||||
|
* `title`
|
||||||
|
* `tags` (List)
|
||||||
|
* `url`
|
37
walla2goodlinks.py
Executable file
37
walla2goodlinks.py
Executable file
@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import html
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
|
||||||
|
CLEANR = re.compile(r"<.*?>")
|
||||||
|
|
||||||
|
with open("Wallabag All articles.json", "rt") as f:
|
||||||
|
json_obj = json.load(f)
|
||||||
|
|
||||||
|
output_obj = []
|
||||||
|
for rec in json_obj:
|
||||||
|
time_added = datetime.fromisoformat(rec["created_at"])
|
||||||
|
time_read = time_added
|
||||||
|
html_str = html.unescape(rec["content"])
|
||||||
|
html_str = html_str.replace("\n", " ")
|
||||||
|
html_str = re.sub(CLEANR, "", html_str)
|
||||||
|
tags = rec["tags"]
|
||||||
|
tags.append("+IMPORTED")
|
||||||
|
new_obj = {
|
||||||
|
"readAt": time_read.timestamp(),
|
||||||
|
"addedAt": time_added.timestamp(),
|
||||||
|
"summary": html_str[:199],
|
||||||
|
"starred": (rec["is_starred"] == 1),
|
||||||
|
"title": rec["title"],
|
||||||
|
"tags": tags,
|
||||||
|
"url": rec["url"],
|
||||||
|
}
|
||||||
|
print(repr(rec))
|
||||||
|
print(repr(new_obj))
|
||||||
|
output_obj.append(new_obj)
|
||||||
|
|
||||||
|
with open("walla2goodlinks.json", "w") as f:
|
||||||
|
json.dump(output_obj, f)
|
Loading…
x
Reference in New Issue
Block a user