commit d774717817cad58f48d59a45475999425da17d44 Author: Markus Birth Date: Thu Jan 20 03:58:43 2022 +0100 Initial commit. diff --git a/README.md b/README.md new file mode 100644 index 0000000..20c3784 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +disqus2s9y +========== + +DISQUS to Serendipity importer. + + +Usage +----- + +Fetch all the comments from your site using the Disqus API. Basically go +into your admin area, go to the "Moderate" section and check the HTTP +requests. + +You should find something going to `https://disqus.com/api/3.0/posts/list`. +It will return a JSON structure. In the section `cursor`, there's a value +`hasNext`. If that's `true`, do the request again but add the parameter +`cursor` with the value from the `next` key to it. This will get you the +next bunch of comments. Rinse and repeat until you got everything. + +Now copy all the files into the directory with these scripts and add +their names to the `DISQUS_FILES` variable in the Python scripts. + +Also download your Serendipity SQLite database into the directory as `serendipity.db`. + +Now run `dump_urls_to_csv.py` to create 2 CSV files. One is `disqus2s9y.csv` +which contains all the URLs from your DISQUS dump and an empty column +`s9y_entry_id`. The second file is `s9y_urls.csv` which contains all the +URLs from your Serendipity database. + +The important step is now to match both, i.e. DISQUS-URL to Serendipity +entry_id. Fill in the matching entry_id into the `s9y_entry_id` column. + +After you're done, run `disqus2s9y.py` and it should import all comments +into your `serendipity.db`. Afterwards copy that back to the server and +you're done. diff --git a/disqus2s9y.py b/disqus2s9y.py new file mode 100755 index 0000000..cad361e --- /dev/null +++ b/disqus2s9y.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import csv +import datetime +import json +import sqlite3 +import sys +from pprint import pprint + +DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"] + + +mappings = {} +with open("disqus2s9y.csv", "rt") as f: + for row in csv.DictReader(f): + if not row["s9y_entry_id"]: + # Skip lines without entry_id + continue + mappings[row["disqus_url"]] = row["s9y_entry_id"] + +print("Found {} mappings in disqus2s9y.csv.".format(len(mappings))) + +comments = [] +for filename in DISQUS_FILES: + with open(filename, "rt") as f: + response = json.load(f) + comments += response["response"] + +print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES))) + +# Sort +print("Sorting comments by timestamp ascending.") +comments = sorted(comments, key=lambda c: c["createdAt"]) + +db = sqlite3.connect("serendipity.db") +cursor = db.cursor() + + +def insert_dict(db_cursor, table, data): + fields = [] + placeholders = [] + values = [] + for k, v in data.items(): + fields.append(k) + placeholders.append("?") + values.append(v) + sql = "INSERT INTO {} ({}) VALUES ({})".format(table, ", ".join(fields), ", ".join(placeholders)) + db_cursor.execute(sql, values) + return db_cursor.lastrowid + +def sanitise_text(message): + # This is for Markdown as I'm using the Markdown plugin + message = message.replace("", "`").replace("", "`") + message = message.replace("\n", " \n") + return message + + +disqus_to_s9y_id = {} +for c in comments: + c_url = c["thread"]["link"] + if not str(c_url) in mappings: + print(f"ERROR: Can't map {c_url} to Serendipity page. Check disqus2s9y.csv!") + continue + parent_id = 0 + if c["parent"]: + if not str(c["parent"]) in disqus_to_s9y_id: + print("ERROR: DISQUS Parent ID {} not found.".format(c["parent"])) + sys.exit(255) + parent_id = disqus_to_s9y_id[str(c["parent"])] + + author_email = "" + if "email" in c["author"]: + author_email = str(c["author"]["email"]) + + author_url = "" + if "url" in c["author"]: + author_url = str(c["author"]["url"]) + + new_comment = { + "entry_id": mappings[c_url], + "parent_id": parent_id, + "timestamp": int(datetime.datetime.fromisoformat(c["createdAt"]).timestamp()), + "title": "", + "author": c["author"]["name"], + "email": author_email, + "url": author_url, + "ip": c["ipAddress"], + "body": sanitise_text(c["raw_message"]), + "type": "NORMAL", + "subscribed": "false", + "status": "approved", + "referer": "" + } + + new_rowid = insert_dict(cursor, "comments", new_comment) + disqus_to_s9y_id[c["id"]] = new_rowid + print("Inserted comment with id {}".format(new_rowid)) + +cursor.close() +db.commit() +db.close() diff --git a/dump_urls_to_csv.py b/dump_urls_to_csv.py new file mode 100755 index 0000000..c82d463 --- /dev/null +++ b/dump_urls_to_csv.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import json +import sqlite3 +from os.path import basename + +# 1. Match URLs from JSON to permalinks/entries in SQLite +# 2. Sort JSON comments old-to-new +# 3. After writing comment into SQLite, store new SQLite-ID and Disqus-ID (for threads) + +DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"] + +comments = [] + +for filename in DISQUS_FILES: + with open(filename, "rt") as f: + response = json.load(f) + comments += response["response"] + +print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES))) + +old_urls = [] +for c in comments: + #old_urls += c["thread"]["identifiers"] + old_urls.append(c["thread"]["link"]) + +old_urls = list(set(old_urls)) + +print("Found {} unique URLs.".format(len(old_urls))) + +with open("disqus2s9y.csv", "wt") as f: + f.write("\"disqus_url\",\"disqus_title\",\"s9y_entry_id\"\n") + for ou in old_urls: + old_name = basename(ou).replace(".html", "") + f.write("\"{}\",\"{}\",\n".format(ou, old_name)) + + +db = sqlite3.connect("serendipity.db") +req = db.execute("SELECT permalink, entry_id FROM permalinks WHERE type='entry'") +response = req.fetchall() + + +with open("s9y_urls.csv", "wt") as f: + f.write("\"s9y_title\",\"s9y_url\",\"entry_id\"\n") + for r in response: + (url, entry_id) = r + name = basename(url).replace(".html", "") + f.write("\"{}\",\"{}\",{}\n".format(name, url, entry_id)) + +