127 lines
No EOL
3.4 KiB
Python
127 lines
No EOL
3.4 KiB
Python
#!/usr/bin/env python3
|
|
from html.parser import HTMLParser
|
|
import xml.etree.ElementTree as ET
|
|
from xml.dom import minidom
|
|
from pathlib import Path
|
|
import json
|
|
import re
|
|
import os
|
|
|
|
class HTMLFilter(HTMLParser):
|
|
text = ""
|
|
def handle_data(self, data):
|
|
self.text += data + "\n"
|
|
|
|
def get_data(type, path):
|
|
with open(path) as file:
|
|
data = json.load(file)
|
|
|
|
desc = ""
|
|
tags = []
|
|
date = data["date"]
|
|
|
|
if type == "bluesky":
|
|
desc = data["text"]
|
|
elif type == "twitter":
|
|
desc = data["content"]
|
|
elif type == "furaffinity":
|
|
desc = data["title"] + "\n\n" + data["description"]
|
|
tags = data["tags"]
|
|
if tags:
|
|
tags.reverse()
|
|
tags.pop()
|
|
tags.reverse()
|
|
elif type == "pixiv":
|
|
f = HTMLFilter()
|
|
f.feed(data["caption"])
|
|
desc = data["title"] + "\n\n" + f.text
|
|
tags = data["tags"]
|
|
date += "." + str(data["num"])
|
|
|
|
if type == "bluesky" or type == "twitter":
|
|
tags = re.findall(r"#(\w+)", desc)
|
|
|
|
return desc, tags, date
|
|
|
|
count = 0
|
|
for path in Path("media").rglob("*.json"):
|
|
file = os.path.splitext(path)[0] + ".xmp"
|
|
if os.path.exists(file):
|
|
continue
|
|
|
|
type = str(path).split("/")[1]
|
|
|
|
desc, tags, date = get_data(type, path)
|
|
|
|
root = minidom.Document()
|
|
xml = root.createElement("x:xmpmeta")
|
|
xml.setAttribute("xmlns:x", "adobe:ns:meta/")
|
|
xml.setAttribute("x:xmptk", "genxmp.py")
|
|
root.appendChild(xml)
|
|
|
|
rdf = root.createElement("rdf:RDF")
|
|
rdf.setAttribute("xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
|
rdfDesc = root.createElement("rdf:Description")
|
|
rdfDesc.setAttribute("rdf:about", "")
|
|
|
|
descr = root.createElement("dc:description")
|
|
descr.appendChild(root.createTextNode(desc))
|
|
rdfDesc.appendChild(descr)
|
|
|
|
exifdate = root.createElement("exif:DateTimeOriginal")
|
|
exifdate.appendChild(root.createTextNode(date))
|
|
rdfDesc.appendChild(exifdate)
|
|
|
|
namespaces = {
|
|
"digiKam": "http://www.digikam.org/ns/1.0/",
|
|
"MicrosoftPhoto": "http://ns.microsoft.com/photo/1.0",
|
|
"lr": "http://ns.adobe.com/lightroom/1.0/",
|
|
"mediapro": "http://ns.iview-multimedia.com/mediapro/1.0/",
|
|
"acdsee": "http://ns.acdsee.com/iptc/1.0/",
|
|
"dc": "http://purl.org/dc/elements/1.1/",
|
|
"xmp": "http://ns.adobe.com/xap/1.0/",
|
|
"exif": "http://ns.adobe.com/exif/1.0/"
|
|
}
|
|
|
|
for ns, url in namespaces.items():
|
|
rdfDesc.setAttribute("xmlns:" + ns, url)
|
|
|
|
rdf.appendChild(rdfDesc)
|
|
|
|
if tags:
|
|
exifConf = root.createElement("exif:ComponentsConfiguration")
|
|
exifSeq = root.createElement("rdf:Seq")
|
|
for i in range(1, len(tags)-1):
|
|
exifli = root.createElement("rdf:li")
|
|
exifli.appendChild(root.createTextNode(str(i)))
|
|
exifSeq.appendChild(exifli)
|
|
|
|
exifli = root.createElement("rdf:li")
|
|
exifli.appendChild(root.createTextNode(str(0)))
|
|
exifSeq.appendChild(exifli)
|
|
|
|
exifConf.appendChild(exifSeq)
|
|
rdfDesc.appendChild(exifConf)
|
|
|
|
tagl = root.createElement("digiKam:TagsList")
|
|
tagSeq = root.createElement("rdf:Seq")
|
|
for tag in tags:
|
|
tagi = root.createElement("rdf:li")
|
|
tagi.appendChild(root.createTextNode(tag))
|
|
tagSeq.appendChild(tagi)
|
|
|
|
tagl.appendChild(tagSeq)
|
|
rdfDesc.appendChild(tagl)
|
|
|
|
xml.appendChild(rdf)
|
|
|
|
xml_str = '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
|
|
xml_str += root.toprettyxml(indent = " ").split("\n", 1)[1]
|
|
xml_str += '<?xpacket end="w"?>'
|
|
|
|
count+=1
|
|
|
|
with open(file, "w") as f:
|
|
f.write(xml_str)
|
|
|
|
print("Generated %s XMP files" % count) |