gists/genxmp.py

#!/usr/bin/env python3
from html.parser import HTMLParser
import xml.etree.ElementTree as ET
from xml.dom import minidom
from pathlib import Path
import json
import re
import os

class HTMLFilter(HTMLParser):
    text = ""
    def handle_data(self, data):
        self.text += data + "\n"

def get_data(type, path):
  with open(path) as file:
    data = json.load(file)

  desc = ""
  tags = []
  date = data["date"]

  if type == "bluesky":
    desc = data["text"]
  elif type == "twitter":
    desc = data["content"]
  elif type == "furaffinity":
    desc = data["title"] + "\n\n" + data["description"]
    tags = data["tags"]
    if tags:
      tags.reverse()
      tags.pop()
      tags.reverse()
  elif type == "pixiv":
    f = HTMLFilter()
    f.feed(data["caption"])
    desc = data["title"] + "\n\n" + f.text
    tags = data["tags"]
    date += "." + str(data["num"])

  if type == "bluesky" or type == "twitter":
    tags = re.findall(r"#(\w+)", desc)

  return desc, tags, date

count = 0
for path in Path("media").rglob("*.json"):
  file = os.path.splitext(path)[0] + ".xmp"
  if os.path.exists(file):
    continue

  type = str(path).split("/")[1]

  desc, tags, date = get_data(type, path)

  root = minidom.Document()
  xml = root.createElement("x:xmpmeta")
  xml.setAttribute("xmlns:x", "adobe:ns:meta/")
  xml.setAttribute("x:xmptk", "genxmp.py")
  root.appendChild(xml)

  rdf = root.createElement("rdf:RDF")
  rdf.setAttribute("xmlns:rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
  rdfDesc = root.createElement("rdf:Description")
  rdfDesc.setAttribute("rdf:about", "")

  descr = root.createElement("dc:description")
  descr.appendChild(root.createTextNode(desc))
  rdfDesc.appendChild(descr)

  exifdate = root.createElement("exif:DateTimeOriginal")
  exifdate.appendChild(root.createTextNode(date))
  rdfDesc.appendChild(exifdate)

  namespaces = {
    "digiKam": "http://www.digikam.org/ns/1.0/",
    "MicrosoftPhoto": "http://ns.microsoft.com/photo/1.0",
    "lr": "http://ns.adobe.com/lightroom/1.0/",
    "mediapro": "http://ns.iview-multimedia.com/mediapro/1.0/",
    "acdsee": "http://ns.acdsee.com/iptc/1.0/",
    "dc": "http://purl.org/dc/elements/1.1/",
    "xmp": "http://ns.adobe.com/xap/1.0/",
    "exif": "http://ns.adobe.com/exif/1.0/"
  }

  for ns, url in namespaces.items():
      rdfDesc.setAttribute("xmlns:" + ns, url)

  rdf.appendChild(rdfDesc)

  if tags:
    exifConf = root.createElement("exif:ComponentsConfiguration")
    exifSeq = root.createElement("rdf:Seq")
    for i in range(1, len(tags)-1):
      exifli = root.createElement("rdf:li")
      exifli.appendChild(root.createTextNode(str(i)))
      exifSeq.appendChild(exifli)

    exifli = root.createElement("rdf:li")
    exifli.appendChild(root.createTextNode(str(0)))
    exifSeq.appendChild(exifli)

    exifConf.appendChild(exifSeq)
    rdfDesc.appendChild(exifConf)

    tagl = root.createElement("digiKam:TagsList")
    tagSeq = root.createElement("rdf:Seq")
    for tag in tags:
      tagi = root.createElement("rdf:li")
      tagi.appendChild(root.createTextNode(tag))
      tagSeq.appendChild(tagi)

    tagl.appendChild(tagSeq)
    rdfDesc.appendChild(tagl)

  xml.appendChild(rdf)

  xml_str = '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
  xml_str += root.toprettyxml(indent = " ").split("\n", 1)[1]
  xml_str += '<?xpacket end="w"?>'

  count+=1

  with open(file, "w") as f:
    f.write(xml_str)

print("Generated %s XMP files" % count)