changeset 314:020d155a179d

Refactor EXIF handling and integrate it with XMP handling. With a disgusting hack we now also support EXIF in WebP.
author Matti Hamalainen <ccr@tnsp.org>
date Mon, 06 Apr 2020 19:05:13 +0300
parents d94b0ebe97c6
children 8a69e693e08c
files mgtool.php
diffstat 1 files changed, 166 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/mgtool.php	Mon Apr 06 18:37:49 2020 +0300
+++ b/mgtool.php	Mon Apr 06 19:05:13 2020 +0300
@@ -248,81 +248,191 @@
 
 
 //
-// Read XMP data block from given file
-// .. it's a horrible hack.
+// Read EXIF and XMP data from a file
 //
-function mgReadXMPFromRAWData($filename, $xmpBlockSize = 1024*64)
+// TODO XXX: Perhaps support XMP sidecar files?
+//
+function mgReadEXIFAndXMPData($filename, &$exif, &$xmp)
 {
+  $exif = FALSE;
+  $xmp = FALSE;
+
   if (($fh = @fopen($filename, 'rb')) === FALSE)
-    return FALSE;
+    return "Could not open file for reading.";
+
+  $fileData = fstat($fh);
+
+  // Probe the file for type
+  $probeSize = 4 * 3;
+  if (($probeData = @fread($fh, $probeSize)) === FALSE)
+    return "Error reading file for type probe";
+
+  $probe = unpack("C4magic/L1riffsize/c4riffid", $probeData);
 
-  $xmpStartTag = "<x:xmpmeta";
-  $xmpEndTag = "</x:xmpmeta>";
+  // Check for RIFF / WEBP
+  if ($probe["magic1"] == 0x52 && $probe["magic2"] == 0x49 &&
+      $probe["magic3"] == 0x46 && $probe["magic4"] == 0x46 &&
+      $probe["riffid1"] == 0x57 && $probe["riffid2"] == 0x45 &&
+      $probe["riffid3"] == 0x42 && $probe["riffid4"] == 0x50)
+  {
+    if ($probe["riffsize"] > $fileData["size"])
+      return "Invalid WebP file, chunk size larger than file size";
+
+    $done = 0;
+    while (!feof($fh) && $done < 2)
+    {
+      // Read chunk header
+      if (($data = @fread($fh, 2 * 4)) == FALSE)
+        return "File read error in WebP RIFF chunk header";
+
+      $chunk = unpack("c4id/L1size", $data);
+
+      /*
+      printf("chunk: '%c%c%c%c' (%02x %02x %02x %02x) osize=%d\n",
+        $chunk["id1"], $chunk["id2"], $chunk["id3"], $chunk["id4"],
+        $chunk["id1"], $chunk["id2"], $chunk["id3"], $chunk["id4"],
+        $chunk["size"]);
+      */
 
-  // Check for start tag
-  $buffer = "";
-  $xmpOK = FALSE;
-  while (!feof($fh))
-  {
-    if (($tmp = @fread($fh, $xmpBlockSize)) === FALSE)
-      return FALSE;
+      // Check for EXIF chunk
+      if ($chunk["id1"] == 0x45 && $chunk["id2"] == 0x58 &&
+          $chunk["id3"] == 0x49 && $chunk["id4"] == 0x46)
+      {
+        // This is an incredibly stupid hack to work around the
+        // fact that PHP's exif_read_data() wants to seek to stream
+        // start and probe things .. if we just had a direct parser
+        // function we would not need this shit.
+        if (($tmpEXIF = @fread($fh, $chunk["size"])) === FALSE)
+          return "Error reading WebP EXIF chunk";
+
+        // Create a temporary file for the EXIF data
+        if (($tmpFile = @tmpfile()) === FALSE)
+          return "Could not create temporary WebP EXIF data file";
+
+        if ((@fwrite($tmpFile, $tmpEXIF, $chunk["size"])) === FALSE)
+        {
+          fclose($tmpFile);
+          return "Error writing WebP EXIT chunk to temporary file";
+        }
+
+        // Parse the EXIF from the temp file
+        $exif = @exif_read_data($tmpFile);
+        fclose($tmpFile);
 
-    $buffer .= $tmp;
-    if (($spos1 = strpos($buffer, "<")) !== FALSE)
-    {
-      $buffer = substr($buffer, $spos1);
-      if (($spos2 = strpos($buffer, $xmpStartTag)) !== FALSE)
+        $done++;
+      }
+      else
+      if ($chunk["id1"] == 0x58 && $chunk["id2"] == 0x4d &&
+          $chunk["id3"] == 0x50 && $chunk["id4"] == 0x20)
       {
-        $buffer = substr($buffer, $spos2);
-        $xmpOK = TRUE;
-        break;
+        // Read and parse XMP data chunk
+        if (($xmpStr = fread($fh, $chunk["size"])) === FALSE)
+          return "File read error in XMP data read";
+
+        $xmp = mgParseXMPData($xmpStr);
+
+        $done++;
+      }
+      else
+      {
+        // Skip other chunks
+        if (fseek($fh, $chunk["size"], SEEK_CUR) < 0)
+          return "File seek error in chunk skip";
+      }
+
+      // If the chunk size is not aligned, skip one byte
+      if ($chunk["size"] & 1)
+      {
+        if (fseek($fh, 1, SEEK_CUR) < 0)
+          return "File seek error in chunk skip";
       }
     }
-    else
-      $buffer = "";
+
+    return TRUE;
   }
+  else
+  {
+    // Other fileformats, e.g. JPEG, PNG, GIF, ..
+
+    // Read EXIF ..
+    if (fseek($fh, 0, SEEK_SET) < 0)
+      return "File seek error in EXIF fptr restore";
 
-  // Check for end tag if start tag was found
-  if ($xmpOK)
-  {
+    $exif = @exif_read_data($fh);
+
+    if (fseek($fh, 0, SEEK_SET) < 0)
+      return "File seek error in EXIF fptr restore";
+
+    // Read XMP data block from the file .. it's a horrible hack.
+    $xmpStartTag = "<x:xmpmeta";
+    $xmpEndTag = "</x:xmpmeta>";
+    $xmpBlockSize = 64 * 1024;
+
+    // Check for start tag
+    $buffer = "";
     $xmpOK = FALSE;
-    $buffer2 = $buffer;
-    do
+    while (!feof($fh))
     {
-      if (($spos1 = strpos($buffer2, "<")) !== FALSE)
+      if (($tmp = fread($fh, $xmpBlockSize)) === FALSE)
+        return "File read error in JPEG XMP read";
+
+      $buffer .= $tmp;
+      if (($spos1 = strpos($buffer, "<")) !== FALSE)
       {
-        $buffer2 = substr($buffer2, $spos1);
-        if (($spos2 = strpos($buffer2, $xmpEndTag)) !== FALSE)
+        $buffer = substr($buffer, $spos1);
+        if (($spos2 = strpos($buffer, $xmpStartTag)) !== FALSE)
         {
+          $buffer = substr($buffer, $spos2);
           $xmpOK = TRUE;
           break;
         }
       }
+      else
+        $buffer = "";
+    }
 
-      if (($tmp = @fread($fh, $xmpBlockSize)) !== FALSE)
-      {
-        $buffer2 .= $tmp;
-        $buffer .= $tmp;
-      }
-      else
-      {
-        $xmpOK = FALSE;
-        break;
-      }
-    } while (!$xmpOK);
-
+    // Check for end tag if start tag was found
     if ($xmpOK)
     {
-      if (($spos = strpos($buffer, $xmpEndTag)) !== FALSE)
-        $buffer = substr($buffer, 0, $spos + strlen($xmpEndTag));
-      else
-        $xmpOK = FALSE;
+      $xmpOK = FALSE;
+      $buffer2 = $buffer;
+      do
+      {
+        if (($spos1 = strpos($buffer2, "<")) !== FALSE)
+        {
+          $buffer2 = substr($buffer2, $spos1);
+          if (($spos2 = strpos($buffer2, $xmpEndTag)) !== FALSE)
+          {
+            $xmpOK = TRUE;
+            break;
+          }
+        }
+
+        if (($tmp = @fread($fh, $xmpBlockSize)) !== FALSE)
+        {
+          $buffer2 .= $tmp;
+          $buffer .= $tmp;
+        }
+        else
+        {
+          $xmpOK = FALSE;
+          break;
+        }
+      } while (!$xmpOK);
+
+      if ($xmpOK)
+      {
+        if (($spos = strpos($buffer, $xmpEndTag)) !== FALSE)
+          $buffer = substr($buffer, 0, $spos + strlen($xmpEndTag));
+        else
+          $xmpOK = FALSE;
+      }
     }
-  }
+
+    $xmp = mgParseXMPData($buffer);
 
-  fclose($fh);
-
-  return $xmpOK ? $buffer : FALSE;
+    return TRUE;
+  }
 }
 
 
@@ -982,17 +1092,17 @@
           $updFlags |= GUPD_CAPTION;
 
         // Check for EXIF and XMP info
-        if ($updFlags & GUPD_EXIF_INFO)
+        if (($updFlags & GUPD_EXIF_INFO) &&
+            ($res = mgReadEXIFAndXMPData($efilename, $exif, $xmp)) === TRUE)
         {
-          // TODO XXX: Perhaps support XMP sidecar files
-          if (($xmpStr = mgReadXMPFromRAWData($efilename)) !== FALSE &&
-              ($xmp = mgParseXMPData($xmpStr)) !== FALSE)
+          if ($xmp !== FALSE)
           {
+            echo "@";
             foreach ($galExifConversions as $conv)
               mgCopyEntryData($edata, $xmp, $conv[GEC_TYPE], $conv[GEC_NAME], $conv[GEC_FIELDS]);
           }
 
-          if (($exif = @exif_read_data($efilename)) !== FALSE)
+          if ($exif !== FALSE)
           {
             echo "%";
             foreach ($galExifConversions as $conv)