Add new rarpd-dx project.

[homepage.git] / lib / xhtml-compat.rb
diff --git a/lib/xhtml-compat.rb b/lib/xhtml-compat.rb

index f69c1eeaad19df9320b1655b30c64297070c30b4..c110502ceee520bab6c2e0c435f222dc67a25760 100644 (file)
--- a/lib/xhtml-compat.rb
+++ b/lib/xhtml-compat.rb
@@ -1,7 +1,7 @@
-# Nick's web site: xhtml_compat filter.  Add whitespace before the end
-# of empty element tags to improve compatibility with old browsers.
+# Nick's web site: xhtml_compat filter.  Perform fixups to improve
+# XHTML compatibility with various user agents.
  #
-# Copyright © 2020 Nick Bowler
+# Copyright © 2019-2021 Nick Bowler
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
@@ -18,8 +18,57 @@
  
  class XhtmlCompatFilter < Nanoc::Filter
      identifier :xhtml_compat
+    requires 'nokogiri'
+
+    Xmlns = {
+        math: 'http://www.w3.org/1998/Math/MathML',
+        svg: 'http://www.w3.org/2000/svg',
+    }.freeze
+
+    XHTMLPublic = '-//W3C//DTD XHTML 1.1//EN'
+    MathPublic  = '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN'
+    MathSystem  = 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd'
+    SVGPublic   = '-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'
+    SVGSystem   = 'http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd'
+
+    # XSLT 1.0 as implemented in Nokogiri canot construct doctypes based
+    # on content.  When using MathML or SVG elements in XHTML a different
+    # doctype is needed: select one based on which elements are present.
+    def fix_doctype(content, params = {})
+        return "#{content}" if not params[:fix_doctype]
+
+        doc = Nokogiri::XML(content)
+        doctype = doc.internal_subset
+
+        return "#{content}" if doctype.external_id != XHTMLPublic
+
+        if not doc.xpath("//svg:svg", Xmlns).empty?
+            doctype.remove
+            doc.create_internal_subset("html", SVGPublic, SVGSystem)
+        elsif not doc.xpath("//math:math", Xmlns).empty?
+            doctype.remove
+            doc.create_internal_subset("html", MathPublic, MathSystem)
+        end
+
+        return doc.to_xml
+    end
  
      def run(content, params = {})
-        return content.gsub(/([^[:space:]])\/>/m, '\1 />');
+        text = fix_doctype(content, params)
+
+        # Old versions of Netscape get confused by <hr/> but have no problem
+        # with <hr />, so avoid that by adding spaces to such elements.
+        text.gsub!(/([^[:space:]])\/>/m, '\1 />');
+
+        # Even older versions of Netscape interpret any script as Javascript,
+        # which causes major problems with the CDATA hack; solve that by making
+        # the whole thing look like a Javascript comment.
+        text.gsub!("<![CDATA[]]x><!--]]>", '/*\&')
+        text.gsub!("<![CDATA[-->]]>", '\&*/')
+
+        # Delete any zero-width word joiners added for XSLT processing.
+        text.delete! "\u2060"
+
+        return text
      end
  end