#!/usr/bin/env python3
from html.parser import HTMLParser
import xml.etree.ElementTree as ET
from xml.dom import minidom
from pathlib import Path
import json
import re
import os
class HTMLFilter(HTMLParser):
text = ""
def handle_data(self, data):
self.text += data + "\n"
def get_data(type, path):
with open(path) as file:
data = json.load(file)
desc = ""
tags = []
date = data["date"]
if type == "bluesky":
desc = data["text"]
elif type == "twitter":
desc = data["content"]
elif type == "furaffinity":
desc = data["title"] + "\n\n" + data["description"]
tags = data["tags"]
if tags:
tags.reverse()
tags.pop()
tags.reverse()
elif type == "pixiv":
f = HTMLFilter()
f.feed(data["caption"])
desc = data["title"] + "\n\n" + f.text
tags = data["tags"]
date += "." + str(data["num"])
if type == "bluesky" or type == "twitter":
tags = re.findall(r"#(\w+)", desc)
return desc, tags, date
count = 0
for path in Path("media").rglob("*.json"):
file = os.path.splitext(path)[0] + ".xmp"
if os.path.exists(file):
continue
type = str(path).split("/")[1]
desc, tags, date = get_data(type, path)
root = minidom.Document()
xml = root.createElement("x:xmpmeta")
xml.setAttribute("xmlns:x", "adobe:ns:meta/")
xml.setAttribute("x:xmptk", "genxmp.py")
root.appendChild(xml)
rdf = root.createElement("rdf:RDF")
rdf.setAttribute("xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfDesc = root.createElement("rdf:Description")
rdfDesc.setAttribute("rdf:about", "")
descr = root.createElement("dc:description")
descr.appendChild(root.createTextNode(desc))
rdfDesc.appendChild(descr)
exifdate = root.createElement("exif:DateTimeOriginal")
exifdate.appendChild(root.createTextNode(date))
rdfDesc.appendChild(exifdate)
namespaces = {
"digiKam": "http://www.digikam.org/ns/1.0/",
"MicrosoftPhoto": "http://ns.microsoft.com/photo/1.0",
"lr": "http://ns.adobe.com/lightroom/1.0/",
"mediapro": "http://ns.iview-multimedia.com/mediapro/1.0/",
"acdsee": "http://ns.acdsee.com/iptc/1.0/",
"dc": "http://purl.org/dc/elements/1.1/",
"xmp": "http://ns.adobe.com/xap/1.0/",
"exif": "http://ns.adobe.com/exif/1.0/"
}
for ns, url in namespaces.items():
rdfDesc.setAttribute("xmlns:" + ns, url)
rdf.appendChild(rdfDesc)
if tags:
exifConf = root.createElement("exif:ComponentsConfiguration")
exifSeq = root.createElement("rdf:Seq")
for i in range(1, len(tags)-1):
exifli = root.createElement("rdf:li")
exifli.appendChild(root.createTextNode(str(i)))
exifSeq.appendChild(exifli)
exifli = root.createElement("rdf:li")
exifli.appendChild(root.createTextNode(str(0)))
exifSeq.appendChild(exifli)
exifConf.appendChild(exifSeq)
rdfDesc.appendChild(exifConf)
tagl = root.createElement("digiKam:TagsList")
tagSeq = root.createElement("rdf:Seq")
for tag in tags:
tagi = root.createElement("rdf:li")
tagi.appendChild(root.createTextNode(tag))
tagSeq.appendChild(tagi)
tagl.appendChild(tagSeq)
rdfDesc.appendChild(tagl)
xml.appendChild(rdf)
xml_str = '\n'
xml_str += root.toprettyxml(indent = " ").split("\n", 1)[1]
xml_str += ''
count+=1
with open(file, "w") as f:
f.write(xml_str)
print("Generated %s XMP files" % count)