# Nick's web site: xhtml_compat filter. Perform fixups to improve # XHTML compatibility with various user agents. # # Copyright © 2019-2021 Nick Bowler # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . class XhtmlCompatFilter < Nanoc::Filter identifier :xhtml_compat requires 'nokogiri' Xmlns = { math: 'http://www.w3.org/1998/Math/MathML', svg: 'http://www.w3.org/2000/svg', }.freeze XHTMLPublic = '-//W3C//DTD XHTML 1.1//EN' MathPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN' MathSystem = 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd' SVGPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN' SVGSystem = 'http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd' # XSLT 1.0 as implemented in Nokogiri canot construct doctypes based # on content. When using MathML or SVG elements in XHTML a different # doctype is needed: select one based on which elements are present. def fix_doctype(content, params = {}) return "#{content}" if not params[:fix_doctype] doc = Nokogiri::XML(content) doctype = doc.internal_subset return "#{content}" if doctype.external_id != XHTMLPublic if not doc.xpath("//svg:svg", Xmlns).empty? doctype.remove doc.create_internal_subset("html", SVGPublic, SVGSystem) elsif not doc.xpath("//math:math", Xmlns).empty? doctype.remove doc.create_internal_subset("html", MathPublic, MathSystem) end return doc.to_xml end def run(content, params = {}) text = fix_doctype(content, params) # Old versions of Netscape get confused by
but have no problem # with
, so avoid that by adding spaces to such elements. text.gsub!(/([^[:space:]])\/>/m, '\1 />'); # Even older versions of Netscape interpret any script as Javascript, # which causes major problems with the CDATA hack; solve that by making # the whole thing look like a Javascript comment. text.gsub!("]]>", '\&*/') # Delete any zero-width word joiners added for XSLT processing. text.delete! "\u2060" return text end end