#!/usr/bin/env python3 from html.parser import HTMLParser import xml.etree.ElementTree as ET from xml.dom import minidom from pathlib import Path import json import re import os class HTMLFilter(HTMLParser): text = "" def handle_data(self, data): self.text += data + "\n" def get_data(type, path): with open(path) as file: data = json.load(file) desc = "" tags = [] date = data["date"] if type == "bluesky": desc = data["text"] elif type == "twitter": desc = data["content"] elif type == "furaffinity": desc = data["title"] + "\n\n" + data["description"] tags = data["tags"] if tags: tags.reverse() tags.pop() tags.reverse() elif type == "pixiv": f = HTMLFilter() f.feed(data["caption"]) desc = data["title"] + "\n\n" + f.text tags = data["tags"] date += "." + str(data["num"]) if type == "bluesky" or type == "twitter": tags = re.findall(r"#(\w+)", desc) return desc, tags, date count = 0 for path in Path("media").rglob("*.json"): file = os.path.splitext(path)[0] + ".xmp" if os.path.exists(file): continue type = str(path).split("/")[1] desc, tags, date = get_data(type, path) root = minidom.Document() xml = root.createElement("x:xmpmeta") xml.setAttribute("xmlns:x", "adobe:ns:meta/") xml.setAttribute("x:xmptk", "genxmp.py") root.appendChild(xml) rdf = root.createElement("rdf:RDF") rdf.setAttribute("xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") rdfDesc = root.createElement("rdf:Description") rdfDesc.setAttribute("rdf:about", "") descr = root.createElement("dc:description") descr.appendChild(root.createTextNode(desc)) rdfDesc.appendChild(descr) exifdate = root.createElement("exif:DateTimeOriginal") exifdate.appendChild(root.createTextNode(date)) rdfDesc.appendChild(exifdate) namespaces = { "digiKam": "http://www.digikam.org/ns/1.0/", "MicrosoftPhoto": "http://ns.microsoft.com/photo/1.0", "lr": "http://ns.adobe.com/lightroom/1.0/", "mediapro": "http://ns.iview-multimedia.com/mediapro/1.0/", "acdsee": "http://ns.acdsee.com/iptc/1.0/", "dc": "http://purl.org/dc/elements/1.1/", "xmp": "http://ns.adobe.com/xap/1.0/", "exif": "http://ns.adobe.com/exif/1.0/" } for ns, url in namespaces.items(): rdfDesc.setAttribute("xmlns:" + ns, url) rdf.appendChild(rdfDesc) if tags: exifConf = root.createElement("exif:ComponentsConfiguration") exifSeq = root.createElement("rdf:Seq") for i in range(1, len(tags)-1): exifli = root.createElement("rdf:li") exifli.appendChild(root.createTextNode(str(i))) exifSeq.appendChild(exifli) exifli = root.createElement("rdf:li") exifli.appendChild(root.createTextNode(str(0))) exifSeq.appendChild(exifli) exifConf.appendChild(exifSeq) rdfDesc.appendChild(exifConf) tagl = root.createElement("digiKam:TagsList") tagSeq = root.createElement("rdf:Seq") for tag in tags: tagi = root.createElement("rdf:li") tagi.appendChild(root.createTextNode(tag)) tagSeq.appendChild(tagi) tagl.appendChild(tagSeq) rdfDesc.appendChild(tagl) xml.appendChild(rdf) xml_str = '\n' xml_str += root.toprettyxml(indent = " ").split("\n", 1)[1] xml_str += '' count+=1 with open(file, "w") as f: f.write(xml_str) print("Generated %s XMP files" % count)