# Nick's web site: xhtml_compat filter. Perform fixups to improve
# XHTML compatibility with various user agents.
#
# Copyright © 2019-2021 Nick Bowler
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
class XhtmlCompatFilter < Nanoc::Filter
identifier :xhtml_compat
requires 'nokogiri'
Xmlns = {
math: 'http://www.w3.org/1998/Math/MathML',
svg: 'http://www.w3.org/2000/svg',
}.freeze
XHTMLPublic = '-//W3C//DTD XHTML 1.1//EN'
MathPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN'
MathSystem = 'http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd'
SVGPublic = '-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'
SVGSystem = 'http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd'
# XSLT 1.0 as implemented in Nokogiri canot construct doctypes based
# on content. When using MathML or SVG elements in XHTML a different
# doctype is needed: select one based on which elements are present.
def fix_doctype(content, params = {})
return "#{content}" if not params[:fix_doctype]
doc = Nokogiri::XML(content)
doctype = doc.internal_subset
return "#{content}" if doctype.external_id != XHTMLPublic
if not doc.xpath("//svg:svg", Xmlns).empty?
doctype.remove
doc.create_internal_subset("html", SVGPublic, SVGSystem)
elsif not doc.xpath("//math:math", Xmlns).empty?
doctype.remove
doc.create_internal_subset("html", MathPublic, MathSystem)
end
return doc.to_xml
end
def run(content, params = {})
text = fix_doctype(content, params)
# Old versions of Netscape get confused by
but have no problem
# with
, so avoid that by adding spaces to such elements.
text.gsub!(/([^[:space:]])\/>/m, '\1 />');
# Even older versions of Netscape interpret any script as Javascript,
# which causes major problems with the CDATA hack; solve that by making
# the whole thing look like a Javascript comment.
text.gsub!("]]>", '\&*/')
# Delete any zero-width word joiners added for XSLT processing.
text.delete! "\u2060"
return text
end
end