X-Git-Url: https://git.draconx.ca/gitweb/homepage.git/blobdiff_plain/5aa756ee68dc2134b6acac8a66af2f1c14521721..0015d84bea1204b4534e5568ff7c0920b9ef02b7:/lib/xhtml-compat.rb diff --git a/lib/xhtml-compat.rb b/lib/xhtml-compat.rb index 25fe2e2..c110502 100644 --- a/lib/xhtml-compat.rb +++ b/lib/xhtml-compat.rb @@ -1,7 +1,7 @@ -# Nick's web site: xhtml_compat filter. Add whitespace before the end -# of empty element tags to improve compatibility with old browsers. +# Nick's web site: xhtml_compat filter. Perform fixups to improve +# XHTML compatibility with various user agents. # -# Copyright © 2020 Nick Bowler +# Copyright © 2019-2021 Nick Bowler # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,11 +18,57 @@ class XhtmlCompatFilter < Nanoc::Filter identifier :xhtml_compat + requires 'nokogiri' + + Xmlns = { + math: 'http://www.w3.org/1998/Math/MathML', + svg: 'http://www.w3.org/2000/svg', + }.freeze + + XHTMLPublic = '-//W3C//DTD XHTML 1.1//EN' + MathPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN' + MathSystem = 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd' + SVGPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN' + SVGSystem = 'http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd' + + # XSLT 1.0 as implemented in Nokogiri canot construct doctypes based + # on content. When using MathML or SVG elements in XHTML a different + # doctype is needed: select one based on which elements are present. + def fix_doctype(content, params = {}) + return "#{content}" if not params[:fix_doctype] + + doc = Nokogiri::XML(content) + doctype = doc.internal_subset + + return "#{content}" if doctype.external_id != XHTMLPublic + + if not doc.xpath("//svg:svg", Xmlns).empty? + doctype.remove + doc.create_internal_subset("html", SVGPublic, SVGSystem) + elsif not doc.xpath("//math:math", Xmlns).empty? + doctype.remove + doc.create_internal_subset("html", MathPublic, MathSystem) + end + + return doc.to_xml + end def run(content, params = {}) - text = content.gsub(/([^[:space:]])\/>/m, '\1 />'); + text = fix_doctype(content, params) + + # Old versions of Netscape get confused by
but have no problem + # with
, so avoid that by adding spaces to such elements. + text.gsub!(/([^[:space:]])\/>/m, '\1 />'); + + # Even older versions of Netscape interpret any script as Javascript, + # which causes major problems with the CDATA hack; solve that by making + # the whole thing look like a Javascript comment. text.gsub!("]]>", '\&*/') + + # Delete any zero-width word joiners added for XSLT processing. + text.delete! "\u2060" + return text end end