changeset 279:54a54921426c

Add functions for extracting XMP information from files and parsing the XMP to get out some relevant fields of data not present in EXIF tags.
author Matti Hamalainen <ccr@tnsp.org>
date Thu, 16 May 2019 21:45:59 +0300
parents 6770ef8b3575
children 8297c895f22e
files mgtool.php
diffstat 1 files changed, 130 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/mgtool.php	Thu May 16 17:59:18 2019 +0300
+++ b/mgtool.php	Thu May 16 21:45:59 2019 +0300
@@ -27,6 +27,7 @@
   [ MG_DATE,  "datetime"    , "DateTimeOriginal" ],
   [ MG_DATE,  "datetime"    , "DateTimeDigitized" ],
   [ MG_INT,   "filesize"    , "FileSize" ],
+  [ MG_STR,   "keywords"    , "keywords" ],
 ];
 
 
@@ -210,6 +211,125 @@
 
 
 //
+// Read and parse XMP data from given file
+// .. a horrible hack.
+//
+function mgReadXMPData($filename, $xmpBlockSize = 1024*64)
+{
+  if (($fh = @fopen($filename, 'rb')) === FALSE)
+    return FALSE;
+
+  $xmpStartTag = "<x:xmpmeta";
+  $xmpEndTag = "</x:xmpmeta>";
+
+  // Check for start tag
+  $buffer = "";
+  $xmpOK = FALSE;
+  while (!feof($fh))
+  {
+    if (($tmp = @fread($fh, $xmpBlockSize)) === FALSE)
+      return FALSE;
+
+    $buffer .= $tmp;
+    if (($spos1 = strpos($buffer, "<")) !== FALSE)
+    {
+      $buffer = substr($buffer, $spos1);
+      if (($spos2 = strpos($buffer, $xmpStartTag)) !== FALSE)
+      {
+        $buffer = substr($buffer, $spos2);
+        $xmpOK = TRUE;
+        break;
+      }
+    }
+    else
+      $buffer = "";
+  }
+
+  // Check for end tag if start tag was found
+  if ($xmpOK)
+  {
+    $xmpOK = FALSE;
+    $buffer2 = $buffer;
+    do
+    {
+      if (($spos1 = strpos($buffer2, "<")) !== FALSE)
+      {
+        $buffer2 = substr($buffer2, $spos1);
+        if (($spos2 = strpos($buffer2, $xmpEndTag)) !== FALSE)
+        {
+          $xmpOK = TRUE;
+          break;
+        }
+      }
+
+      if (($tmp = @fread($fh, $xmpBlockSize)) !== FALSE)
+      {
+        $buffer2 .= $tmp;
+        $buffer .= $tmp;
+      }
+      else
+      {
+        $xmpOK = FALSE;
+        break;
+      }
+    } while (!$xmpOK);
+
+    if ($xmpOK)
+    {
+      if (($spos = strpos($buffer, $xmpEndTag)) !== FALSE)
+        $buffer = substr($buffer, 0, $spos + strlen($xmpEndTag));
+      else
+        $xmpOK = FALSE;
+    }
+  }
+
+  fclose($fh);
+
+  return $xmpOK ? $buffer : FALSE;
+}
+
+
+function mgParseXMPData($xmpStr)
+{
+  // SimpleXML apparently can't handle namespaces,
+  // so we will crudely remove them with some regexes
+  $xmpPatterns =
+  [
+    "/[a-zA-Z]+:/",
+    "/\/[a-zA-Z]+:/",
+    "/<\/?(Bag|Alt|Seq)>/"
+  ];
+
+  $xmpReplacements =
+  [
+    "",
+    "\/",
+    "",
+  ];
+
+  $xmpStr = preg_replace($xmpPatterns, $xmpReplacements, $xmpStr);
+
+  // Parse XML to a SimpleXMLElement structure
+  if (($xmpOb = @simplexml_load_string($xmpStr)) === FALSE)
+    return FALSE;
+
+  // Process structure to simple flat array of data
+  // for the desired elements only
+  $xmpData = [];
+
+  //if (($tmp = $xmpOb->xpath("RDF/Description/description/li")) !== FALSE)
+  //  $xmpData["description"] = (string) $xe[0];
+
+  if (($xres = $xmpOb->xpath("RDF/Description/subject/li")) !== FALSE)
+  {
+    $xmpData["keywords"] = array_map(function($xkw) { return (string) $xkw; }, $xres);
+  }
+
+  return $xmpData;
+}
+
+
+//
 // Converts one value (mainly from EXIF tag information)
 // by doing explicit type casting and special conversions.
 //
@@ -723,6 +843,16 @@
           mgNeedUpdate($galEntry, "mtime", filemtime($capFilename)))
           $updFlags |= GUPD_CAPTION;
 
+        // Check for XMP info
+        // TODO XXX: Support XMP sidecar files
+        if (($updFlags & GUPD_EXIF_INFO) &&
+            ($xmpStr = mgReadXMPData($efilename)) !== FALSE &&
+            ($xmp = mgParseXMPData($xmpStr)) !== FALSE)
+        {
+          foreach ($galExifConversions as $conv)
+            mgCopyEntryData($edata, $xmp, $conv[0], $conv[1], $conv[2]);
+        }
+
         // Check for EXIF info
         if (($updFlags & GUPD_EXIF_INFO) &&
             ($exif = @exif_read_data($efilename)) !== FALSE)