-# Nick's web site: xhtml_compat filter. Add whitespace before the end
-# of empty element tags to improve compatibility with old browsers.
+# Nick's web site: xhtml_compat filter. Perform fixups to improve
+# XHTML compatibility with various user agents.
#
-# Copyright © 2020 Nick Bowler
+# Copyright © 2019-2021 Nick Bowler
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
class XhtmlCompatFilter < Nanoc::Filter
identifier :xhtml_compat
+ requires 'nokogiri'
+
+ Xmlns = {
+ math: 'http://www.w3.org/1998/Math/MathML',
+ svg: 'http://www.w3.org/2000/svg',
+ }.freeze
+
+ XHTMLPublic = '-//W3C//DTD XHTML 1.1//EN'
+ MathPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN'
+ MathSystem = 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd'
+ SVGPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'
+ SVGSystem = 'http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd'
+
+ # XSLT 1.0 as implemented in Nokogiri canot construct doctypes based
+ # on content. When using MathML or SVG elements in XHTML a different
+ # doctype is needed: select one based on which elements are present.
+ def fix_doctype(content, params = {})
+ return "#{content}" if not params[:fix_doctype]
+
+ doc = Nokogiri::XML(content)
+ doctype = doc.internal_subset
+
+ return "#{content}" if doctype.external_id != XHTMLPublic
+
+ if not doc.xpath("//svg:svg", Xmlns).empty?
+ doctype.remove
+ doc.create_internal_subset("html", SVGPublic, SVGSystem)
+ elsif not doc.xpath("//math:math", Xmlns).empty?
+ doctype.remove
+ doc.create_internal_subset("html", MathPublic, MathSystem)
+ end
+
+ return doc.to_xml
+ end
def run(content, params = {})
- return content.gsub(/([^[:space:]])\/>/m, '\1 />');
+ text = fix_doctype(content, params)
+
+ # Old versions of Netscape get confused by <hr/> but have no problem
+ # with <hr />, so avoid that by adding spaces to such elements.
+ text.gsub!(/([^[:space:]])\/>/m, '\1 />');
+
+ # Even older versions of Netscape interpret any script as Javascript,
+ # which causes major problems with the CDATA hack; solve that by making
+ # the whole thing look like a Javascript comment.
+ text.gsub!("<![CDATA[]]x><!--]]>", '/*\&')
+ text.gsub!("<![CDATA[-->]]>", '\&*/')
+
+ # Delete any zero-width word joiners added for XSLT processing.
+ text.delete! "\u2060"
+
+ return text
end
end