1 # Nick's web site: xhtml_compat filter. Perform fixups to improve
2 # XHTML compatibility with various user agents.
4 # Copyright © 2019-2021 Nick Bowler
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <https://www.gnu.org/licenses/>.
19 class XhtmlCompatFilter < Nanoc::Filter
20 identifier :xhtml_compat
24 math: 'http://www.w3.org/1998/Math/MathML',
25 svg: 'http://www.w3.org/2000/svg',
28 XHTMLPublic = '-//W3C//DTD XHTML 1.1//EN'
29 MathPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN'
30 MathSystem = 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd'
31 SVGPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'
32 SVGSystem = 'http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd'
34 # XSLT 1.0 as implemented in Nokogiri canot construct doctypes based
35 # on content. When using MathML or SVG elements in XHTML a different
36 # doctype is needed: select one based on which elements are present.
37 def fix_doctype(content, params = {})
38 return "#{content}" if not params[:fix_doctype]
40 doc = Nokogiri::XML(content)
41 doctype = doc.internal_subset
43 return "#{content}" if doctype.external_id != XHTMLPublic
45 if not doc.xpath("//svg:svg", Xmlns).empty?
47 doc.create_internal_subset("html", SVGPublic, SVGSystem)
48 elsif not doc.xpath("//math:math", Xmlns).empty?
50 doc.create_internal_subset("html", MathPublic, MathSystem)
56 def run(content, params = {})
57 text = fix_doctype(content, params)
59 # Old versions of Netscape get confused by <hr/> but have no problem
60 # with <hr />, so avoid that by adding spaces to such elements.
61 text.gsub!(/([^[:space:]])\/>/m, '\1 />');
63 # Even older versions of Netscape interpret any script as Javascript,
64 # which causes major problems with the CDATA hack; solve that by making
65 # the whole thing look like a Javascript comment.
66 text.gsub!("<![CDATA[]]x><!--]]>", '/*\&')
67 text.gsub!("<![CDATA[-->]]>", '\&*/')
69 # Delete any zero-width word joiners added for XSLT processing.