# HG changeset patch # User Matti Hamalainen # Date 1586189113 -10800 # Node ID 020d155a179df17ee69114e727c4c2f70cf002ae # Parent d94b0ebe97c6d23c28bd71b6f1e3863d5c1bd1bf Refactor EXIF handling and integrate it with XMP handling. With a disgusting hack we now also support EXIF in WebP. diff -r d94b0ebe97c6 -r 020d155a179d mgtool.php --- a/mgtool.php Mon Apr 06 18:37:49 2020 +0300 +++ b/mgtool.php Mon Apr 06 19:05:13 2020 +0300 @@ -248,81 +248,191 @@ // -// Read XMP data block from given file -// .. it's a horrible hack. +// Read EXIF and XMP data from a file // -function mgReadXMPFromRAWData($filename, $xmpBlockSize = 1024*64) +// TODO XXX: Perhaps support XMP sidecar files? +// +function mgReadEXIFAndXMPData($filename, &$exif, &$xmp) { + $exif = FALSE; + $xmp = FALSE; + if (($fh = @fopen($filename, 'rb')) === FALSE) - return FALSE; + return "Could not open file for reading."; + + $fileData = fstat($fh); + + // Probe the file for type + $probeSize = 4 * 3; + if (($probeData = @fread($fh, $probeSize)) === FALSE) + return "Error reading file for type probe"; + + $probe = unpack("C4magic/L1riffsize/c4riffid", $probeData); - $xmpStartTag = " $fileData["size"]) + return "Invalid WebP file, chunk size larger than file size"; + + $done = 0; + while (!feof($fh) && $done < 2) + { + // Read chunk header + if (($data = @fread($fh, 2 * 4)) == FALSE) + return "File read error in WebP RIFF chunk header"; + + $chunk = unpack("c4id/L1size", $data); + + /* + printf("chunk: '%c%c%c%c' (%02x %02x %02x %02x) osize=%d\n", + $chunk["id1"], $chunk["id2"], $chunk["id3"], $chunk["id4"], + $chunk["id1"], $chunk["id2"], $chunk["id3"], $chunk["id4"], + $chunk["size"]); + */ - // Check for start tag - $buffer = ""; - $xmpOK = FALSE; - while (!feof($fh)) - { - if (($tmp = @fread($fh, $xmpBlockSize)) === FALSE) - return FALSE; + // Check for EXIF chunk + if ($chunk["id1"] == 0x45 && $chunk["id2"] == 0x58 && + $chunk["id3"] == 0x49 && $chunk["id4"] == 0x46) + { + // This is an incredibly stupid hack to work around the + // fact that PHP's exif_read_data() wants to seek to stream + // start and probe things .. if we just had a direct parser + // function we would not need this shit. + if (($tmpEXIF = @fread($fh, $chunk["size"])) === FALSE) + return "Error reading WebP EXIF chunk"; + + // Create a temporary file for the EXIF data + if (($tmpFile = @tmpfile()) === FALSE) + return "Could not create temporary WebP EXIF data file"; + + if ((@fwrite($tmpFile, $tmpEXIF, $chunk["size"])) === FALSE) + { + fclose($tmpFile); + return "Error writing WebP EXIT chunk to temporary file"; + } + + // Parse the EXIF from the temp file + $exif = @exif_read_data($tmpFile); + fclose($tmpFile); - $buffer .= $tmp; - if (($spos1 = strpos($buffer, "<")) !== FALSE) - { - $buffer = substr($buffer, $spos1); - if (($spos2 = strpos($buffer, $xmpStartTag)) !== FALSE) + $done++; + } + else + if ($chunk["id1"] == 0x58 && $chunk["id2"] == 0x4d && + $chunk["id3"] == 0x50 && $chunk["id4"] == 0x20) { - $buffer = substr($buffer, $spos2); - $xmpOK = TRUE; - break; + // Read and parse XMP data chunk + if (($xmpStr = fread($fh, $chunk["size"])) === FALSE) + return "File read error in XMP data read"; + + $xmp = mgParseXMPData($xmpStr); + + $done++; + } + else + { + // Skip other chunks + if (fseek($fh, $chunk["size"], SEEK_CUR) < 0) + return "File seek error in chunk skip"; + } + + // If the chunk size is not aligned, skip one byte + if ($chunk["size"] & 1) + { + if (fseek($fh, 1, SEEK_CUR) < 0) + return "File seek error in chunk skip"; } } - else - $buffer = ""; + + return TRUE; } + else + { + // Other fileformats, e.g. JPEG, PNG, GIF, .. + + // Read EXIF .. + if (fseek($fh, 0, SEEK_SET) < 0) + return "File seek error in EXIF fptr restore"; - // Check for end tag if start tag was found - if ($xmpOK) - { + $exif = @exif_read_data($fh); + + if (fseek($fh, 0, SEEK_SET) < 0) + return "File seek error in EXIF fptr restore"; + + // Read XMP data block from the file .. it's a horrible hack. + $xmpStartTag = "