Initial commit.

2022-01-20 03:58:43 +01:00
commit d774717817
3 changed files with 188 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,35 @@
 disqus2s9y
 ==========
 DISQUS to Serendipity importer.
 Usage
 -----
 Fetch all the comments from your site using the Disqus API. Basically go
 into your admin area, go to the "Moderate" section and check the HTTP
 requests.
 You should find something going to `https://disqus.com/api/3.0/posts/list`.
 It will return a JSON structure. In the section `cursor`, there's a value
 `hasNext`. If that's `true`, do the request again but add the parameter
 `cursor` with the value from the `next` key to it. This will get you the
 next bunch of comments. Rinse and repeat until you got everything.
 Now copy all the files into the directory with these scripts and add
 their names to the `DISQUS_FILES` variable in the Python scripts.
 Also download your Serendipity SQLite database into the directory as `serendipity.db`.
 Now run `dump_urls_to_csv.py` to create 2 CSV files. One is `disqus2s9y.csv`
 which contains all the URLs from your DISQUS dump and an empty column
 `s9y_entry_id`. The second file is `s9y_urls.csv` which contains all the
 URLs from your Serendipity database.
 The important step is now to match both, i.e. DISQUS-URL to Serendipity
 entry_id. Fill in the matching entry_id into the `s9y_entry_id` column.
 After you're done, run `disqus2s9y.py` and it should import all comments
 into your `serendipity.db`. Afterwards copy that back to the server and
 you're done.
--- a/disqus2s9y.py
+++ b/disqus2s9y.py
@ -0,0 +1,102 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import csv
 import datetime
 import json
 import sqlite3
 import sys
 from pprint import pprint
 DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
 mappings = {}
 with open("disqus2s9y.csv", "rt") as f:
    for row in csv.DictReader(f):
        if not row["s9y_entry_id"]:
            # Skip lines without entry_id
            continue
        mappings[row["disqus_url"]] = row["s9y_entry_id"]
 print("Found {} mappings in disqus2s9y.csv.".format(len(mappings)))
 comments = []
 for filename in DISQUS_FILES:
    with open(filename, "rt") as f:
        response = json.load(f)
        comments += response["response"]
 print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
 # Sort
 print("Sorting comments by timestamp ascending.")
 comments = sorted(comments, key=lambda c: c["createdAt"])
 db = sqlite3.connect("serendipity.db")
 cursor = db.cursor()
 def insert_dict(db_cursor, table, data):
    fields = []
    placeholders = []
    values = []
    for k, v in data.items():
        fields.append(k)
        placeholders.append("?")
        values.append(v)
    sql = "INSERT INTO {} ({}) VALUES ({})".format(table, ", ".join(fields), ", ".join(placeholders))
    db_cursor.execute(sql, values)
    return db_cursor.lastrowid
 def sanitise_text(message):
    # This is for Markdown as I'm using the Markdown plugin
    message = message.replace("<code>", "`").replace("</code>", "`")
    message = message.replace("\n", "  \n")
    return message
 disqus_to_s9y_id = {}
 for c in comments:
    c_url = c["thread"]["link"]
    if not str(c_url) in mappings:
        print(f"ERROR: Can't map {c_url} to Serendipity page. Check disqus2s9y.csv!")
        continue
    parent_id = 0
    if c["parent"]:
        if not str(c["parent"]) in disqus_to_s9y_id:
            print("ERROR: DISQUS Parent ID {} not found.".format(c["parent"]))
            sys.exit(255)
        parent_id = disqus_to_s9y_id[str(c["parent"])]
    author_email = ""
    if "email" in c["author"]:
        author_email = str(c["author"]["email"])
    author_url = ""
    if "url" in c["author"]:
        author_url = str(c["author"]["url"])
    new_comment = {
        "entry_id": mappings[c_url],
        "parent_id": parent_id,
        "timestamp": int(datetime.datetime.fromisoformat(c["createdAt"]).timestamp()),
        "title": "",
        "author": c["author"]["name"],
        "email": author_email,
        "url": author_url,
        "ip": c["ipAddress"],
        "body": sanitise_text(c["raw_message"]),
        "type": "NORMAL",
        "subscribed": "false",
        "status": "approved",
        "referer": ""
    }
    new_rowid = insert_dict(cursor, "comments", new_comment)
    disqus_to_s9y_id[c["id"]] = new_rowid
    print("Inserted comment with id {}".format(new_rowid))
 cursor.close()
 db.commit()
 db.close()
--- a/dump_urls_to_csv.py
+++ b/dump_urls_to_csv.py
@ -0,0 +1,51 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import json
 import sqlite3
 from os.path import basename
 # 1. Match URLs from JSON to permalinks/entries in SQLite
 # 2. Sort JSON comments old-to-new
 # 3. After writing comment into SQLite, store new SQLite-ID and Disqus-ID (for threads)
 DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
 comments = []
 for filename in DISQUS_FILES:
    with open(filename, "rt") as f:
        response = json.load(f)
        comments += response["response"]
 print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
 old_urls = []
 for c in comments:
    #old_urls += c["thread"]["identifiers"]
    old_urls.append(c["thread"]["link"])
 old_urls = list(set(old_urls))
 print("Found {} unique URLs.".format(len(old_urls)))
 with open("disqus2s9y.csv", "wt") as f:
    f.write("\"disqus_url\",\"disqus_title\",\"s9y_entry_id\"\n")
    for ou in old_urls:
        old_name = basename(ou).replace(".html", "")
        f.write("\"{}\",\"{}\",\n".format(ou, old_name))
 db = sqlite3.connect("serendipity.db")
 req = db.execute("SELECT permalink, entry_id FROM permalinks WHERE type='entry'")
 response = req.fetchall()
 with open("s9y_urls.csv", "wt") as f:
    f.write("\"s9y_title\",\"s9y_url\",\"entry_id\"\n")
    for r in response:
        (url, entry_id) = r
        name = basename(url).replace(".html", "")
        f.write("\"{}\",\"{}\",{}\n".format(name, url, entry_id))