commit a99deee9cb20697cb0d3d24450f5f865b65f92a4 Author: root Date: Fri Oct 24 04:06:24 2025 +0200 Add genxmp.py diff --git a/genxmp.py b/genxmp.py new file mode 100644 index 0000000..ff56da6 --- /dev/null +++ b/genxmp.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +from html.parser import HTMLParser +import xml.etree.ElementTree as ET +from xml.dom import minidom +from pathlib import Path +import json +import re +import os + +class HTMLFilter(HTMLParser): + text = "" + def handle_data(self, data): + self.text += data + "\n" + +def get_data(type, path): + with open(path) as file: + data = json.load(file) + + desc = "" + tags = [] + date = data["date"] + + if type == "bluesky": + desc = data["text"] + elif type == "twitter": + desc = data["content"] + elif type == "furaffinity": + desc = data["title"] + "\n\n" + data["description"] + tags = data["tags"] + if tags: + tags.reverse() + tags.pop() + tags.reverse() + elif type == "pixiv": + f = HTMLFilter() + f.feed(data["caption"]) + desc = data["title"] + "\n\n" + f.text + tags = data["tags"] + date += "." + str(data["num"]) + + if type == "bluesky" or type == "twitter": + tags = re.findall(r"#(\w+)", desc) + + return desc, tags, date + +count = 0 +for path in Path("media").rglob("*.json"): + file = os.path.splitext(path)[0] + ".xmp" + if os.path.exists(file): + continue + + type = str(path).split("/")[1] + + desc, tags, date = get_data(type, path) + + root = minidom.Document() + xml = root.createElement("x:xmpmeta") + xml.setAttribute("xmlns:x", "adobe:ns:meta/") + xml.setAttribute("x:xmptk", "genxmp.py") + root.appendChild(xml) + + rdf = root.createElement("rdf:RDF") + rdf.setAttribute("xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") + rdfDesc = root.createElement("rdf:Description") + rdfDesc.setAttribute("rdf:about", "") + + descr = root.createElement("dc:description") + descr.appendChild(root.createTextNode(desc)) + rdfDesc.appendChild(descr) + + exifdate = root.createElement("exif:DateTimeOriginal") + exifdate.appendChild(root.createTextNode(date)) + rdfDesc.appendChild(exifdate) + + namespaces = { + "digiKam": "http://www.digikam.org/ns/1.0/", + "MicrosoftPhoto": "http://ns.microsoft.com/photo/1.0", + "lr": "http://ns.adobe.com/lightroom/1.0/", + "mediapro": "http://ns.iview-multimedia.com/mediapro/1.0/", + "acdsee": "http://ns.acdsee.com/iptc/1.0/", + "dc": "http://purl.org/dc/elements/1.1/", + "xmp": "http://ns.adobe.com/xap/1.0/", + "exif": "http://ns.adobe.com/exif/1.0/" + } + + for ns, url in namespaces.items(): + rdfDesc.setAttribute("xmlns:" + ns, url) + + rdf.appendChild(rdfDesc) + + if tags: + exifConf = root.createElement("exif:ComponentsConfiguration") + exifSeq = root.createElement("rdf:Seq") + for i in range(1, len(tags)-1): + exifli = root.createElement("rdf:li") + exifli.appendChild(root.createTextNode(str(i))) + exifSeq.appendChild(exifli) + + exifli = root.createElement("rdf:li") + exifli.appendChild(root.createTextNode(str(0))) + exifSeq.appendChild(exifli) + + exifConf.appendChild(exifSeq) + rdfDesc.appendChild(exifConf) + + tagl = root.createElement("digiKam:TagsList") + tagSeq = root.createElement("rdf:Seq") + for tag in tags: + tagi = root.createElement("rdf:li") + tagi.appendChild(root.createTextNode(tag)) + tagSeq.appendChild(tagi) + + tagl.appendChild(tagSeq) + rdfDesc.appendChild(tagl) + + xml.appendChild(rdf) + + xml_str = '\n' + xml_str += root.toprettyxml(indent = " ").split("\n", 1)[1] + xml_str += '' + + count+=1 + + with open(file, "w") as f: + f.write(xml_str) + +print("Generated %s XMP files" % count) \ No newline at end of file