70 lines
2.8 KiB
Python
70 lines
2.8 KiB
Python
from functools import partial
|
|
from os.path import basename
|
|
import re
|
|
import frontmatter
|
|
|
|
|
|
class JekyllArticle():
|
|
RE_IMG = r'!\[(.*?)\]\((.+?)( [\'"].+[\'"])?\)'
|
|
RE_HTML_IMG = r'<img src="(.*?)"'
|
|
RE_SITE_URL = r'\{\{ ?site\.url ?\}\}'
|
|
|
|
def __init__(self, md_file: str, base_dir: str = "."):
|
|
self.file_path = md_file
|
|
self.base_dir = base_dir
|
|
self.body = ""
|
|
self.metadata = {}
|
|
self.images = []
|
|
self.replace_image_newpath = ""
|
|
self.parse_file()
|
|
self.collect_images()
|
|
|
|
def parse_file(self):
|
|
article = frontmatter.load(self.file_path)
|
|
self.metadata = article.metadata
|
|
self.body = article.content
|
|
|
|
# Convert Twig highlights to Markdown Extra highlights
|
|
self.body = re.sub(r'\{% highlight( (\S+)) %\}', r'```\2', self.body)
|
|
self.body = re.sub(r'\{% endhighlight %\}', r'```', self.body)
|
|
|
|
def collect_images(self):
|
|
matches = re.findall(self.RE_IMG, self.body) # Returns a tuple
|
|
for match in matches:
|
|
imgfile = match[1]
|
|
imgfile = re.sub(self.RE_SITE_URL, self.base_dir, imgfile)
|
|
self.images.append(imgfile)
|
|
matches = re.findall(self.RE_HTML_IMG, self.body) # Returns plain matches
|
|
for match in matches:
|
|
imgfile = match
|
|
imgfile = re.sub(self.RE_SITE_URL, self.base_dir, imgfile)
|
|
self.images.append(imgfile)
|
|
self.images = list(set(self.images))
|
|
|
|
def _replace_single_imagepath(self, match, new_base_url: str = ""):
|
|
old_imgfile = match.group(2)
|
|
old_imgfile = re.sub(self.RE_SITE_URL, self.base_dir, old_imgfile)
|
|
img_name = basename(old_imgfile)
|
|
new_imgfile = "{}{}".format(new_base_url, img_name)
|
|
# print("Copying image {} to {} ...".format(old_imgfile, new_imgfile))
|
|
# shutil.copyfile(old_imgfile, new_imgfile)
|
|
img_title = ""
|
|
if match.group(3):
|
|
img_title = match.group(3)
|
|
new_string = "".format(match.group(1), new_imgfile, img_title)
|
|
return new_string
|
|
|
|
def _replace_single_htmlimagepath(self, match, new_base_url: str = ""):
|
|
old_imgfile = match.group(1)
|
|
old_imgfile = re.sub(self.RE_SITE_URL, self.base_dir, old_imgfile)
|
|
img_name = basename(old_imgfile)
|
|
new_imgfile = "{}{}".format(new_base_url, img_name)
|
|
new_string = "<img src=\"" + new_imgfile + "\""
|
|
return new_string
|
|
|
|
def replace_imagepaths(self, new_base_url: str = ""):
|
|
# Images OLD: 
|
|
# Images NEW: 
|
|
self.body = re.sub(self.RE_IMG, partial(self._replace_single_imagepath, new_base_url=new_base_url), self.body)
|
|
self.body = re.sub(self.RE_HTML_IMG, partial(self._replace_single_htmlimagepath, new_base_url=new_base_url), self.body)
|