Initial commit.
This commit is contained in:
commit
d774717817
35
README.md
Normal file
35
README.md
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
disqus2s9y
|
||||||
|
==========
|
||||||
|
|
||||||
|
DISQUS to Serendipity importer.
|
||||||
|
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
|
||||||
|
Fetch all the comments from your site using the Disqus API. Basically go
|
||||||
|
into your admin area, go to the "Moderate" section and check the HTTP
|
||||||
|
requests.
|
||||||
|
|
||||||
|
You should find something going to `https://disqus.com/api/3.0/posts/list`.
|
||||||
|
It will return a JSON structure. In the section `cursor`, there's a value
|
||||||
|
`hasNext`. If that's `true`, do the request again but add the parameter
|
||||||
|
`cursor` with the value from the `next` key to it. This will get you the
|
||||||
|
next bunch of comments. Rinse and repeat until you got everything.
|
||||||
|
|
||||||
|
Now copy all the files into the directory with these scripts and add
|
||||||
|
their names to the `DISQUS_FILES` variable in the Python scripts.
|
||||||
|
|
||||||
|
Also download your Serendipity SQLite database into the directory as `serendipity.db`.
|
||||||
|
|
||||||
|
Now run `dump_urls_to_csv.py` to create 2 CSV files. One is `disqus2s9y.csv`
|
||||||
|
which contains all the URLs from your DISQUS dump and an empty column
|
||||||
|
`s9y_entry_id`. The second file is `s9y_urls.csv` which contains all the
|
||||||
|
URLs from your Serendipity database.
|
||||||
|
|
||||||
|
The important step is now to match both, i.e. DISQUS-URL to Serendipity
|
||||||
|
entry_id. Fill in the matching entry_id into the `s9y_entry_id` column.
|
||||||
|
|
||||||
|
After you're done, run `disqus2s9y.py` and it should import all comments
|
||||||
|
into your `serendipity.db`. Afterwards copy that back to the server and
|
||||||
|
you're done.
|
102
disqus2s9y.py
Executable file
102
disqus2s9y.py
Executable file
@ -0,0 +1,102 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
|
||||||
|
|
||||||
|
|
||||||
|
mappings = {}
|
||||||
|
with open("disqus2s9y.csv", "rt") as f:
|
||||||
|
for row in csv.DictReader(f):
|
||||||
|
if not row["s9y_entry_id"]:
|
||||||
|
# Skip lines without entry_id
|
||||||
|
continue
|
||||||
|
mappings[row["disqus_url"]] = row["s9y_entry_id"]
|
||||||
|
|
||||||
|
print("Found {} mappings in disqus2s9y.csv.".format(len(mappings)))
|
||||||
|
|
||||||
|
comments = []
|
||||||
|
for filename in DISQUS_FILES:
|
||||||
|
with open(filename, "rt") as f:
|
||||||
|
response = json.load(f)
|
||||||
|
comments += response["response"]
|
||||||
|
|
||||||
|
print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
|
||||||
|
|
||||||
|
# Sort
|
||||||
|
print("Sorting comments by timestamp ascending.")
|
||||||
|
comments = sorted(comments, key=lambda c: c["createdAt"])
|
||||||
|
|
||||||
|
db = sqlite3.connect("serendipity.db")
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
|
||||||
|
def insert_dict(db_cursor, table, data):
|
||||||
|
fields = []
|
||||||
|
placeholders = []
|
||||||
|
values = []
|
||||||
|
for k, v in data.items():
|
||||||
|
fields.append(k)
|
||||||
|
placeholders.append("?")
|
||||||
|
values.append(v)
|
||||||
|
sql = "INSERT INTO {} ({}) VALUES ({})".format(table, ", ".join(fields), ", ".join(placeholders))
|
||||||
|
db_cursor.execute(sql, values)
|
||||||
|
return db_cursor.lastrowid
|
||||||
|
|
||||||
|
def sanitise_text(message):
|
||||||
|
# This is for Markdown as I'm using the Markdown plugin
|
||||||
|
message = message.replace("<code>", "`").replace("</code>", "`")
|
||||||
|
message = message.replace("\n", " \n")
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
|
disqus_to_s9y_id = {}
|
||||||
|
for c in comments:
|
||||||
|
c_url = c["thread"]["link"]
|
||||||
|
if not str(c_url) in mappings:
|
||||||
|
print(f"ERROR: Can't map {c_url} to Serendipity page. Check disqus2s9y.csv!")
|
||||||
|
continue
|
||||||
|
parent_id = 0
|
||||||
|
if c["parent"]:
|
||||||
|
if not str(c["parent"]) in disqus_to_s9y_id:
|
||||||
|
print("ERROR: DISQUS Parent ID {} not found.".format(c["parent"]))
|
||||||
|
sys.exit(255)
|
||||||
|
parent_id = disqus_to_s9y_id[str(c["parent"])]
|
||||||
|
|
||||||
|
author_email = ""
|
||||||
|
if "email" in c["author"]:
|
||||||
|
author_email = str(c["author"]["email"])
|
||||||
|
|
||||||
|
author_url = ""
|
||||||
|
if "url" in c["author"]:
|
||||||
|
author_url = str(c["author"]["url"])
|
||||||
|
|
||||||
|
new_comment = {
|
||||||
|
"entry_id": mappings[c_url],
|
||||||
|
"parent_id": parent_id,
|
||||||
|
"timestamp": int(datetime.datetime.fromisoformat(c["createdAt"]).timestamp()),
|
||||||
|
"title": "",
|
||||||
|
"author": c["author"]["name"],
|
||||||
|
"email": author_email,
|
||||||
|
"url": author_url,
|
||||||
|
"ip": c["ipAddress"],
|
||||||
|
"body": sanitise_text(c["raw_message"]),
|
||||||
|
"type": "NORMAL",
|
||||||
|
"subscribed": "false",
|
||||||
|
"status": "approved",
|
||||||
|
"referer": ""
|
||||||
|
}
|
||||||
|
|
||||||
|
new_rowid = insert_dict(cursor, "comments", new_comment)
|
||||||
|
disqus_to_s9y_id[c["id"]] = new_rowid
|
||||||
|
print("Inserted comment with id {}".format(new_rowid))
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
db.commit()
|
||||||
|
db.close()
|
51
dump_urls_to_csv.py
Executable file
51
dump_urls_to_csv.py
Executable file
@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
from os.path import basename
|
||||||
|
|
||||||
|
# 1. Match URLs from JSON to permalinks/entries in SQLite
|
||||||
|
# 2. Sort JSON comments old-to-new
|
||||||
|
# 3. After writing comment into SQLite, store new SQLite-ID and Disqus-ID (for threads)
|
||||||
|
|
||||||
|
DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
|
||||||
|
|
||||||
|
comments = []
|
||||||
|
|
||||||
|
for filename in DISQUS_FILES:
|
||||||
|
with open(filename, "rt") as f:
|
||||||
|
response = json.load(f)
|
||||||
|
comments += response["response"]
|
||||||
|
|
||||||
|
print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
|
||||||
|
|
||||||
|
old_urls = []
|
||||||
|
for c in comments:
|
||||||
|
#old_urls += c["thread"]["identifiers"]
|
||||||
|
old_urls.append(c["thread"]["link"])
|
||||||
|
|
||||||
|
old_urls = list(set(old_urls))
|
||||||
|
|
||||||
|
print("Found {} unique URLs.".format(len(old_urls)))
|
||||||
|
|
||||||
|
with open("disqus2s9y.csv", "wt") as f:
|
||||||
|
f.write("\"disqus_url\",\"disqus_title\",\"s9y_entry_id\"\n")
|
||||||
|
for ou in old_urls:
|
||||||
|
old_name = basename(ou).replace(".html", "")
|
||||||
|
f.write("\"{}\",\"{}\",\n".format(ou, old_name))
|
||||||
|
|
||||||
|
|
||||||
|
db = sqlite3.connect("serendipity.db")
|
||||||
|
req = db.execute("SELECT permalink, entry_id FROM permalinks WHERE type='entry'")
|
||||||
|
response = req.fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
with open("s9y_urls.csv", "wt") as f:
|
||||||
|
f.write("\"s9y_title\",\"s9y_url\",\"entry_id\"\n")
|
||||||
|
for r in response:
|
||||||
|
(url, entry_id) = r
|
||||||
|
name = basename(url).replace(".html", "")
|
||||||
|
f.write("\"{}\",\"{}\",{}\n".format(name, url, entry_id))
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user