Mercurial > hg > mgallery
changeset 279:54a54921426c
Add functions for extracting XMP information from files and parsing the XMP
to get out some relevant fields of data not present in EXIF tags.
author | Matti Hamalainen <ccr@tnsp.org> |
---|---|
date | Thu, 16 May 2019 21:45:59 +0300 |
parents | 6770ef8b3575 |
children | 8297c895f22e |
files | mgtool.php |
diffstat | 1 files changed, 130 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/mgtool.php Thu May 16 17:59:18 2019 +0300 +++ b/mgtool.php Thu May 16 21:45:59 2019 +0300 @@ -27,6 +27,7 @@ [ MG_DATE, "datetime" , "DateTimeOriginal" ], [ MG_DATE, "datetime" , "DateTimeDigitized" ], [ MG_INT, "filesize" , "FileSize" ], + [ MG_STR, "keywords" , "keywords" ], ]; @@ -210,6 +211,125 @@ // +// Read and parse XMP data from given file +// .. a horrible hack. +// +function mgReadXMPData($filename, $xmpBlockSize = 1024*64) +{ + if (($fh = @fopen($filename, 'rb')) === FALSE) + return FALSE; + + $xmpStartTag = "<x:xmpmeta"; + $xmpEndTag = "</x:xmpmeta>"; + + // Check for start tag + $buffer = ""; + $xmpOK = FALSE; + while (!feof($fh)) + { + if (($tmp = @fread($fh, $xmpBlockSize)) === FALSE) + return FALSE; + + $buffer .= $tmp; + if (($spos1 = strpos($buffer, "<")) !== FALSE) + { + $buffer = substr($buffer, $spos1); + if (($spos2 = strpos($buffer, $xmpStartTag)) !== FALSE) + { + $buffer = substr($buffer, $spos2); + $xmpOK = TRUE; + break; + } + } + else + $buffer = ""; + } + + // Check for end tag if start tag was found + if ($xmpOK) + { + $xmpOK = FALSE; + $buffer2 = $buffer; + do + { + if (($spos1 = strpos($buffer2, "<")) !== FALSE) + { + $buffer2 = substr($buffer2, $spos1); + if (($spos2 = strpos($buffer2, $xmpEndTag)) !== FALSE) + { + $xmpOK = TRUE; + break; + } + } + + if (($tmp = @fread($fh, $xmpBlockSize)) !== FALSE) + { + $buffer2 .= $tmp; + $buffer .= $tmp; + } + else + { + $xmpOK = FALSE; + break; + } + } while (!$xmpOK); + + if ($xmpOK) + { + if (($spos = strpos($buffer, $xmpEndTag)) !== FALSE) + $buffer = substr($buffer, 0, $spos + strlen($xmpEndTag)); + else + $xmpOK = FALSE; + } + } + + fclose($fh); + + return $xmpOK ? $buffer : FALSE; +} + + +function mgParseXMPData($xmpStr) +{ + // SimpleXML apparently can't handle namespaces, + // so we will crudely remove them with some regexes + $xmpPatterns = + [ + "/[a-zA-Z]+:/", + "/\/[a-zA-Z]+:/", + "/<\/?(Bag|Alt|Seq)>/" + ]; + + $xmpReplacements = + [ + "", + "\/", + "", + ]; + + $xmpStr = preg_replace($xmpPatterns, $xmpReplacements, $xmpStr); + + // Parse XML to a SimpleXMLElement structure + if (($xmpOb = @simplexml_load_string($xmpStr)) === FALSE) + return FALSE; + + // Process structure to simple flat array of data + // for the desired elements only + $xmpData = []; + + //if (($tmp = $xmpOb->xpath("RDF/Description/description/li")) !== FALSE) + // $xmpData["description"] = (string) $xe[0]; + + if (($xres = $xmpOb->xpath("RDF/Description/subject/li")) !== FALSE) + { + $xmpData["keywords"] = array_map(function($xkw) { return (string) $xkw; }, $xres); + } + + return $xmpData; +} + + +// // Converts one value (mainly from EXIF tag information) // by doing explicit type casting and special conversions. // @@ -723,6 +843,16 @@ mgNeedUpdate($galEntry, "mtime", filemtime($capFilename))) $updFlags |= GUPD_CAPTION; + // Check for XMP info + // TODO XXX: Support XMP sidecar files + if (($updFlags & GUPD_EXIF_INFO) && + ($xmpStr = mgReadXMPData($efilename)) !== FALSE && + ($xmp = mgParseXMPData($xmpStr)) !== FALSE) + { + foreach ($galExifConversions as $conv) + mgCopyEntryData($edata, $xmp, $conv[0], $conv[1], $conv[2]); + } + // Check for EXIF info if (($updFlags & GUPD_EXIF_INFO) && ($exif = @exif_read_data($efilename)) !== FALSE)