Class | Bcat::HeadParser |
In: |
lib/bcat/html.rb
|
Parent: | Object |
HEAD_TOKS | = | [ /\A(<!DOCTYPE.*?>)/m, /\A(<title.*?>.*?<\/title>)/mi, /\A(<script.*?>.*?<\/script>)/mi, /\A(<style.*?>.*?<\/style>)/mi, /\A(<(?:html|head|meta|link|base).*?>)/mi, /\A(<\/(?:html|head|meta|link|base|script|style|title)>)/mi, /\A(<!--(.*?)-->)/m |
BODY_TOKS | = | [ /\A[^<]/, /\A<(?!html|head|meta|link|base|script|style|title).*?>/ |
buf | [RW] |
# File lib/bcat/html.rb, line 8 8: def initialize 9: @buf = '' 10: @head = [] 11: @body = nil 12: @html = nil 13: end
The current body contents. The <body> tag is guaranteed to be present. If a <body> was included in the input, it‘s preserved with original attributes; otherwise, a <body> tag is inserted. The inject argument can be used to insert a string as the immediate descendant of the <body> tag.
# File lib/bcat/html.rb, line 49 49: def body(inject=nil) 50: if @body =~ /\A\s*(<body.*?>)(.*)/mi 51: [$1, inject, $2].compact.join("\n") 52: else 53: ["<body>", inject, @body].compact.join("\n") 54: end 55: end
Determine if the input is HTML. This is nil before the first non-whitespace character is received, true if the first non-whitespace character is a ’<’, and false if the first non-whitespace character is something other than ’<’.
# File lib/bcat/html.rb, line 35 35: def html? 36: @html 37: end
Parses buf into head and body parts. Basic approach is to eat anything possibly body related until we hit text or a body element.
# File lib/bcat/html.rb, line 74 74: def parse(buf=@buf) 75: if @html.nil? 76: if buf =~ /\A\s*[<]/m 77: @html = true 78: elsif buf =~ /\A\s*[^<]/m 79: @html = false 80: end 81: end 82: 83: while !buf.empty? 84: buf.sub!(/\A(\s+)/m) { @head << $1 ; '' } 85: matched = 86: HEAD_TOKS.any? do |tok| 87: buf.sub!(tok) do 88: @head << $1 89: '' 90: end 91: end 92: break unless matched 93: end 94: 95: 96: if buf.empty? 97: buf 98: elsif BODY_TOKS.any? { |tok| buf =~ tok } 99: @body = buf 100: nil 101: else 102: buf 103: end 104: end