X-Git-Url: https://git.draconx.ca/gitweb/homepage.git/blobdiff_plain/5aa756ee68dc2134b6acac8a66af2f1c14521721..0015d84bea1204b4534e5568ff7c0920b9ef02b7:/lib/xhtml-compat.rb

diff --git a/lib/xhtml-compat.rb b/lib/xhtml-compat.rb
index 25fe2e2..c110502 100644
--- a/lib/xhtml-compat.rb
+++ b/lib/xhtml-compat.rb
@@ -1,7 +1,7 @@
-# Nick's web site: xhtml_compat filter.  Add whitespace before the end
-# of empty element tags to improve compatibility with old browsers.
+# Nick's web site: xhtml_compat filter.  Perform fixups to improve
+# XHTML compatibility with various user agents.
 #
-# Copyright Â© 2020 Nick Bowler
+# Copyright Â© 2019-2021 Nick Bowler
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -18,11 +18,57 @@
 
 class XhtmlCompatFilter < Nanoc::Filter
     identifier :xhtml_compat
+    requires 'nokogiri'
+
+    Xmlns = {
+        math: 'http://www.w3.org/1998/Math/MathML',
+        svg: 'http://www.w3.org/2000/svg',
+    }.freeze
+
+    XHTMLPublic = '-//W3C//DTD XHTML 1.1//EN'
+    MathPublic  = '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN'
+    MathSystem  = 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd'
+    SVGPublic   = '-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'
+    SVGSystem   = 'http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd'
+
+    # XSLT 1.0 as implemented in Nokogiri canot construct doctypes based
+    # on content.  When using MathML or SVG elements in XHTML a different
+    # doctype is needed: select one based on which elements are present.
+    def fix_doctype(content, params = {})
+        return "#{content}" if not params[:fix_doctype]
+
+        doc = Nokogiri::XML(content)
+        doctype = doc.internal_subset
+
+        return "#{content}" if doctype.external_id != XHTMLPublic
+
+        if not doc.xpath("//svg:svg", Xmlns).empty?
+            doctype.remove
+            doc.create_internal_subset("html", SVGPublic, SVGSystem)
+        elsif not doc.xpath("//math:math", Xmlns).empty?
+            doctype.remove
+            doc.create_internal_subset("html", MathPublic, MathSystem)
+        end
+
+        return doc.to_xml
+    end
 
     def run(content, params = {})
-        text = content.gsub(/([^[:space:]])\/>/m, '\1 />');
+        text = fix_doctype(content, params)
+
+        # Old versions of Netscape get confused by <hr/> but have no problem
+        # with <hr />, so avoid that by adding spaces to such elements.
+        text.gsub!(/([^[:space:]])\/>/m, '\1 />');
+
+        # Even older versions of Netscape interpret any script as Javascript,
+        # which causes major problems with the CDATA hack; solve that by making
+        # the whole thing look like a Javascript comment.
         text.gsub!("<![CDATA[]]x><!--]]>", '/*\&')
         text.gsub!("<![CDATA[-->]]>", '\&*/')
+
+        # Delete any zero-width word joiners added for XSLT processing.
+        text.delete! "\u2060"
+
         return text
     end
 end